Introduction

We start by loading the rdata provided by Cédric who has imported and edited all the xlsx files. He also provides a very good overview of the content here. Based on this job, we will try to carry out a similar analysis as for seasonality. More specifically, we can use the same Bayesian model to make a clustering of time series. For each stage, we will build a data set that gives for each season, and each EMU (and perhaps habitat), the proportion of catches per month.

For convenience, we rename the data.frame with names consistent with the seasonality data.set

res <- res %>%
  rename(das_month=month, das_value=value, das_year=year)

Glass Eel

First, let’s select data corresponding to glass eel stage.

glass_eel <- subset(res, res$lfs_code=="G")

# we start by removing rows with only zero
all_zero <- glass_eel %>%   group_by(emu_nameshort,lfs_code,hty_code,das_year) %>%
        summarize(S=sum(das_value)) %>% 
    filter(S==0)

glass_eel <- glass_eel %>% 
      anti_join(all_zero)
## Joining, by = c("das_year", "emu_nameshort", "lfs_code", "hty_code")
#For glass eel, we aggregate data per habitat
glass_eel <- glass_eel %>%
  select(das_year, das_month, das_value, emu_nameshort, cou_code) %>%
  group_by(das_year, das_month, emu_nameshort, cou_code) %>%
  summarise(das_value=sum(das_value))

Similarly to seasonality, we will build season. For glass eels, seasons are rather consistent in whole Europe, so we use the same definition as in seasonality: Here, we split in october (starts of catches in Spain) and a season y will correspond to ostober - december y-1 and january to september y.

glass_eel$season <- ifelse(glass_eel$das_month>9,
                             glass_eel$das_year+1,
                             glass_eel$das_year)
glass_eel$month_in_season <- as.factor(ifelse(glass_eel$das_month>9,
                                      glass_eel$das_month-9,
                                      glass_eel$das_month+3)) #1 stands for nov,

#we remove data from season 2020
glass_eel <- glass_eel %>%
  filter(season < 2020)

Data selection

Now we should carry out data selection, more specifically, we want to eliminate rows with two many missing data, too much zero and to check whether there are no duplicates (though Cedric already did it)

kept_seasons <- lapply(unique(glass_eel$emu_nameshort), function(s){
  sub_glass <- subset(glass_eel, glass_eel$emu_nameshort==s)
  good_coverage_wave(sub_glass, "G")
})
## [1] "For  ES_Astu  a good season should cover months: 11 to 4"
## [1] "For  ES_Cata  a good season should cover months: 10 to 4"
## [1] "For  FR_Adou  a good season should cover months: 11 to 3"
## [1] "For  FR_Arto  a good season should cover months: 1 to 4"
## [1] "For  FR_Bret  a good season should cover months: 12 to 4"
## [1] "For  FR_Garo  a good season should cover months: 11 to 4"
## [1] "For  FR_Loir  a good season should cover months: 12 to 4"
## [1] "For  FR_Sein  a good season should cover months: 1 to 5"
## [1] "For  ES_Basq  a good season should cover months: 11 to 2"
## [1] "For  GB_SouW  a good season should cover months: 2 to 6"
## [1] "For  GB_NorW  a good season should cover months: 2 to 7"
## [1] "For  GB_Seve  a good season should cover months: 3 to 6"
## [1] "For  GB_Wale  a good season should cover months: 2 to 6"
## [1] "For  ES_Mino  a good season should cover months: 11 to 3"
## [1] "For  ES_Vale  a good season should cover months: 12 to 3"
## [1] "For ES_MINH not possible to define a season"
## [1] "For  ES_Cant  a good season should cover months: 11 to 3"

Finally, here are the series kept given previous criterion.

names(kept_seasons) <- unique(glass_eel$emu_nameshort)
kept_seasons[!sapply(kept_seasons,is.null)]
## $ES_Astu
##  [1] 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
## [15] 2015 2016 2017 2018 2019
## 
## $ES_Cata
##  [1] 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
## [15] 2015 2016 2017 2018 2019
## 
## $FR_Adou
##  [1] 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
## [15] 2015 2016 2017 2018
## 
## $FR_Arto
##  [1] 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
## [15] 2014 2015 2016 2017 2018
## 
## $FR_Bret
##  [1] 2001 2002 2003 2004 2005 2006 2007 2009 2010 2011 2012 2013 2014 2015
## [15] 2016 2017 2018
## 
## $FR_Garo
##  [1] 2001 2002 2003 2004 2005 2006 2007 2009 2010 2011 2012 2013 2014 2015
## [15] 2016 2017 2018
## 
## $FR_Loir
##  [1] 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
## [15] 2015 2016 2017 2018
## 
## $FR_Sein
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $ES_Basq
##  [1] 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## [15] 2019
## 
## $GB_SouW
## [1] 2005 2006 2007 2008 2009 2013
## 
## $GB_NorW
## [1] 2008
## 
## $GB_Seve
## [1] 2005 2006 2007 2008 2009
## 
## $GB_Wale
## [1] 2005 2006 2007 2008 2009
## 
## $ES_Mino
## [1] 2011 2012 2013 2014 2015 2016 2017 2018 2019
## 
## $ES_Vale
## [1] 2011 2012 2013 2014 2015 2016 2017 2018 2019
## 
## $ES_Cant
## [1] 2014 2015 2016 2017 2018 2019

Data preparation

We carry out the same procedure as for seasonality.

glasseel_subset <- subset(glass_eel, 
                           mapply(function(season, series){
                             season %in% kept_seasons[[series]]
                           }, glass_eel$season, glass_eel$emu_nameshort))


glasseel_wide <- pivot_wider(data=glasseel_subset[, c("emu_nameshort",
                                                     "cou_code",
                                                     "season",
                                                     "das_month",
                                                     "das_value")],
                                names_from="das_month",
                                values_from="das_value")
names(glasseel_wide)[-(1:3)] <- paste("m",
                                       names(glasseel_wide)[-(1:3)],
                                       sep="")

###we count the number of zeros per lines to remove lines without enough
###fishes
data_poor <- data.frame(glasseel_wide$emu_nameshort,
                        glasseel_wide$season,
                  zero=rowSums(glasseel_wide[, -(1:3)] == 0 |
                                 is.na(glasseel_wide[, -(1:3)])),
           tot=rowSums(glasseel_wide[, -(1:3)], na.rm=TRUE))

glasseel_wide <- glasseel_wide[data_poor$zero < 10 & data_poor$tot>30, ]

table_datapoor(data_poor %>% filter(zero > 9 | tot<50)) #we remove years where we have less than 2 months)
“data poor”" situation
EMU season number of zero total catch
GB_Wale 2009 7 13.51
FR_Arto 2010 10 112.00
ES_Vale 2018 10 0.70
ES_Vale 2019 11 39.00

It leads to a dataset with 189 rows.

We now replace NA value per zero since we selected our dataseries with missing months corresponding to insignificant months / closed months, and we compute proportions per month for each year.

glasseel_wide <- glasseel_wide %>%
  replace_na(replace=list(m1=0,
                          m2=0,
                          m3=0,
                          m4=0,
                          m5=0,
                          m6=0,
                          m7=0,
                          m8=0,
                          m9=0,
                          m10=0,
                          m11=0,
                          m12=0))
glasseel_wide[, -(1:3)] <- glasseel_wide[, -(1:3)] + 1e-3
total_catch_year <- rowSums(glasseel_wide[, paste("m", 1:12, sep="")])
glasseel_wide <- glasseel_wide %>%
  mutate_at(.vars=paste("m",1:12,sep=""),function(x) x/total_catch_year)

The Commission asks us to compare the pattern before and after 2007, probably to see the effect of the Eel Regulation. It is therefore necessary to build a period index. However, since most countries implemented their EMPs only in 2009/2010, we split in 2010.

glasseel_wide$period <- ifelse(glasseel_wide$season>2009,
                                  2,
                                  1)

kable(table(glasseel_wide$period,
       glasseel_wide$emu_nameshort),
      caption="number of seasons per period",
      row.names=TRUE)
number of seasons per period
ES_Astu ES_Basq ES_Cant ES_Cata ES_Mino ES_Vale FR_Adou FR_Arto FR_Bret FR_Garo FR_Loir FR_Sein GB_NorW GB_Seve GB_SouW GB_Wale
1 9 5 0 9 0 0 9 10 8 8 9 1 1 5 5 4
2 10 10 6 10 9 7 9 8 9 9 9 9 0 0 1 0

The situation is well balanced between the two periods.

Running the model

group <- as.integer(interaction(glasseel_wide$emu_nameshort,
                                            glasseel_wide$period,
                                            drop=TRUE))
nb_occ_group <- table(group)
y <-as.matrix(glasseel_wide[, paste("m", 1:12, sep="")])

Now, we make a loop to select the number of clusters based on a DIC criterion

cl <- makeCluster(3, 'FORK')
comparison <- parSapply(cl, 2:7,
       function(nbclus){
         mydata <- build_data(nbclus)
         adapted <- FALSE
         while (!adapted){
           tryCatch({
             runjags.options(adapt.incomplete="error")
             res <- run.jags("jags_model.txt", monitor= c("deviance",
                                                          "alpha_group",
                                                          "cluster"),
                        summarise=FALSE, adapt=40000, method="parallel",
                        sample=2000,burnin=100000,n.chains=1,
                        inits=generate_init(nbclus, mydata)[[1]],
                        data=mydata)
                        adapted <- TRUE
                        res_mat <- as.matrix(as.mcmc.list(res))
                        silhouette <- median(compute_silhouette(res_mat))
                        nbused <- apply(res_mat, 1, function(iter){
                          length(table(iter[grep("cluster",
                                                 names(iter))]))
                        })
                        dic <- mean(res_mat[,1])+0.5*var(res_mat[,1])
                        stats <- c(dic,silhouette,mean(nbused))
                  }, error=function(e) {
                    print(paste("not adapted, restarting nbclus",nbclus))
                    }
                  )
         }
         stats
      })
stopCluster(cl)
best_glasseel_landings <- data.frame(nbclus=2:(ncol(comparison)+1),
                                              dic=comparison[1, ],
                                              silhouette=comparison[2, ],
                                     used=comparison[3,])
save(best_glasseel_landings, file="glasseel_landings_jags.rdata")
load("best_glasseel_landings")
best_glasseel_landings
##   nbclus       dic
## 1      2 -17814.18
## 2      3 -17640.64
## 3      4 -17336.52
## 4      5 -16888.02
## 5      6 -16522.37
## 6      7 -16043.03

Given that the number of used clusters do not increase much after 4 and that the silhouette tends to decrease, we use 4 clusters.

nbclus <- 4
mydata <-build_data(4)


adapted <- FALSE
while (!adapted){
   tryCatch({
      runjags.options(adapt.incomplete="error")
      myfit_glasseel_landings <- run.jags("jags_model.txt", monitor= c("cluster", "esp", "alpha_group",
                                            "cluster", "centroid",
                                            "centroid_group",
                                            "distToClust", "duration_clus",
                                            "duration_group",
                                            "lambda","id_cluster",
                                            "centroid"),
                      summarise=FALSE, adapt=50000, method="parallel",
                      sample=10000,burnin=200000,n.chains=1, thin=5,
                      inits=generate_init(nbclus, mydata)[[1]], data=mydata)
      adapted <- TRUE
    }, error=function(e) {
       print(paste("not adapted, restarting nbclus",nbclus))
    })
}

save(myfit_glasseel_landings, best_glasseel_landings,
     file="glasseel_landings_jags.rdata")

Results

Once fitted, we can plot monthly pattern per cluster

load("glasseel_landings_jags.rdata")
nbclus <- 4
mydata <-build_data(4)
get_pattern_month <- function(res,type="cluster"){
  res_mat <- as.matrix(as.mcmc.list(res, add.mutate=FALSE))
  if (type=="cluster"){
    sub_mat <- as.data.frame(res_mat[,grep("esp",colnames(res_mat))])
  }
  sub_mat <- sub_mat %>% 
    pivot_longer(cols=1:ncol(sub_mat),
                 names_to="param",
                 values_to="proportion")
  tmp <- lapply(as.character(sub_mat$param),function(p) strsplit(p,"[[:punct:]]"))
  sub_mat$cluster<-as.factor(
    as.integer(lapply(tmp, function(tt) tt[[1]][2])))
  sub_mat$month <- as.character(lapply(tmp,
                                       function(tt) paste("m",
                                                          tt[[1]][3],
                                                          sep="")))
  sub_mat$month <- factor(sub_mat$month, levels=paste("m", 1:12, sep=""))
  sub_mat
}

pat <-get_pattern_month(myfit_glasseel_landings)
#we number cluster in chronological orders from november to october
pat$cluster <- factor(match(pat$cluster,c("3","1","4","2")),
                      levels=as.character(1:7))
ggplot(pat,aes(x=month,y=proportion))+
  geom_boxplot(aes(fill=cluster),outlier.shape=NA) +
  scale_fill_manual(values=cols)+facet_wrap(.~cluster, ncol=1) +
  theme_bw()

We compute some statistics to characterize the clusters.

table_characteristics(myfit_glasseel_landings, 4)
characteristics of clusters
number of months to reach 80% of total
month of centroid
cluster median q2.5% q97.5% median q2.5% q97.5%
1 4 3 4 1.40 1.34 1.46
2 6 4 7 0.52 10.93 2.28
3 3 3 3 0.31 0.26 0.36
4 3 3 3 3.25 3.20 3.31

Duration indicates the minimum number of months that covers 80% of the wave (1st column is the median, and the 2 next one quantiles 2.5% and 97.5% of credibility intervals). Centroid is the centroid of the migration wave (e.g. 11.5 would indicate a migration centred around mid november). The first column is the median and the two next one the quantiles 2.5 and 97.5%.

Clusters 1 starts in autum and last still january. Cluster 2 is shifter one month later and lasts longer. Cluster 3 corresponds to catches in march/may. Cluster 4 is very flat and is not really attributed.

We can also look at the belonging of the different groups.

groups <- interaction(glasseel_wide$emu_nameshort,
                                            glasseel_wide$period,
                                            drop=TRUE)
group_name <- levels(groups)

get_pattern_month <- function(res,mydata){
  
  tmp <- strsplit(as.character(group_name),
                  "\\.")
  ser <- as.character(lapply(tmp,function(tt){
    tt[1]
  }))
  period <- as.character(lapply(tmp,function(tt){
    tt[2]
  }))
  res_mat <- as.matrix(as.mcmc.list(res,add.mutate=FALSE))
  
  clus <- t(sapply(seq_len(length(unique(groups))), function(id){
    name_col <- paste("cluster[",id,"]",sep="")
    freq <- table(res_mat[,name_col])
    max_class <- names(freq)[order(freq,decreasing=TRUE)[1]]
    c(max_class,freq[as.character(1:nbclus)])
  }))
  storage.mode(clus) <- "numeric"
  classes <- as.data.frame(clus)
  names(classes) <- c("cluster", paste("clus",seq_len(nbclus),sep=""))
  cbind.data.frame(data.frame(ser=ser, period=period),
                   classes)
}

myclassif <- get_pattern_month(myfit_glasseel_landings)
myclassif$cluster <- factor(match(myclassif$cluster,c("3","1","4","2")),
                            levels=as.character(1:7))

table_classif(myclassif)
EMU period Max cluster % clus 1 % clus 2 % clus 3 % clus 4
ES_Astu 2 1 0 0 100 0
ES_Basq 1 1 0 0 100 0
ES_Basq 2 1 0 0 100 0
ES_Cant 2 1 0 0 100 0
ES_Cata 1 1 0 0 100 0
ES_Cata 2 1 0 0 100 0
ES_Mino 2 1 0 0 100 0
FR_Adou 1 1 0 0 100 0
FR_Adou 2 1 0 0 100 0
ES_Astu 1 2 89 0 11 0
ES_Vale 2 2 100 0 0 0
FR_Bret 1 2 100 0 0 0
FR_Bret 2 2 100 0 0 0
FR_Garo 1 2 100 0 0 0
FR_Garo 2 2 100 0 0 0
FR_Loir 1 2 100 0 0 0
FR_Loir 2 2 100 0 0 0
FR_Arto 1 3 0 0 0 100
FR_Arto 2 3 0 0 0 100
FR_Sein 1 3 0 0 0 100
FR_Sein 2 3 0 0 0 100
GB_NorW 1 3 0 0 0 100
GB_Seve 1 3 0 0 0 100
GB_SouW 1 3 0 0 0 100
GB_SouW 2 3 0 0 0 100
GB_Wale 1 3 0 0 0 100

The spatial pattern is obvious in the results. Interestingly, we saw an EMU that change cluster between period and this seem to correspond to management measures that have effectively shorten the fishing season.

myclassif_p1 <- subset(myclassif, myclassif$period == 1)
myclassif_p2 <- subset(myclassif, myclassif$period == 2)
emu$cluster1 <- factor(myclassif_p1$cluster[match(emu$name_short,
                                                  substr(myclassif_p1$ser,1,nchar(as.character(myclassif_p1$ser))))], levels=1:7)

emu$cluster2 <- factor(myclassif_p2$cluster[match(emu$name_short,
                                                substr(myclassif_p2$ser,1,nchar(as.character(myclassif_p2$ser))))],
                       levels=1:7)
ggplot(data = cou) +  geom_sf(fill= "antiquewhite") +
        geom_sf(data=emu,aes(fill=cluster1)) + scale_fill_manual(values=cols)+
  theme_bw() +xlim(-20,30) + ylim(35,65) 

ggplot(data = cou) +  geom_sf(fill= "antiquewhite") +
        geom_sf(data=emu,aes(fill=cluster2)) + scale_fill_manual(values=cols)+
  theme_bw() +xlim(-20,30) + ylim(35,65)  

Exporting pattern per group

tmp <- as.matrix(as.mcmc.list(myfit_glasseel_landings))
name_col = colnames(tmp)

pattern_GE_landings=do.call("rbind.data.frame",
                            lapply(seq_len(length(levels(groups))), function(g)
                                   median_pattern_group(g, group_name,tmp, "G","landings", hty_code="T")))
save(pattern_GE_landings,file="pattern_G_landings.rdata")

Similarity between and after 2010

#which groups have data in both periods
occ=table(unique(glasseel_wide[,c("emu_nameshort", "period")])[,1])
tocompare=names(occ)[which(occ>1)]

simi=sapply(tocompare, function(s){
  g=grep(s,group_name)
  esp1=tmp[,grep(paste("alpha_group\\[",g[1],",",sep=""),name_col)]
  esp2=tmp[,grep(paste("alpha_group\\[",g[2],",",sep=""),name_col)]
  quantile(apply(cbind(esp1,esp2),
                 1,
                 function(x) sum(pmin(x[1:12],x[13:24]))),
           probs=c(0.025,.5,.975))
})

similarity=data.frame(emu=tocompare,t(simi))

table_similarity(similarity)
similarity
EMU q2.5% median q97.5%
ES_Astu 0.74 0.83 0.91
ES_Basq 0.64 0.74 0.83
ES_Cata 0.77 0.86 0.93
FR_Adou 0.65 0.75 0.85
FR_Arto 0.64 0.71 0.79
FR_Bret 0.73 0.83 0.92
FR_Garo 0.72 0.83 0.91
FR_Loir 0.78 0.88 0.95
FR_Sein 0.60 0.77 0.91
GB_SouW 0.55 0.72 0.88

Potential effect of EMP and EU closures

ncar=nchar(group_name)
period=as.integer(substr(as.character(group_name),ncar,ncar))
emus=substr(group_name,1,ncar-2)



#######EMP
#For glass eels, we summed catches over hty, therefore here, we aggregate closures
#taking the most restrictive if there are differences among habitats
list_period1=data.frame(emu_nameshort=emus[period==1])
list_period1$group=group_name[period==1]
list_period1$id_g=match(list_period1$group,group_name)

#we check that we have ladings data at least two years before the first EMP closures
list_period1$estimable=sapply(list_period1$emu_nameshort, function(s) {
  length(which(charac_EMP_closures$emu_nameshort==s 
               & grepl("G",charac_EMP_closures$lfs_code) 
               & charac_EMP_closures$hty_code != "F"))>0})

list_period1$estimable=list_period1$estimable &
(sapply(list_period1$id_g,function(e) min(glasseel_wide$season[group==e]))+2 <
sapply(list_period1$emu_nameshort,function(e) min(charac_EMP_closures$year[charac_EMP_closures$emu_nameshort==e &
                                                           grepl("G",charac_EMP_closures$lfs_code) &
                                                    charac_EMP_closures$hty_code !="F"])))

list_period1$lossq2.5=NA
list_period1$lossq50=NA
list_period1$lossq97.5=NA

res_closures=mapply(function(s,g) {
  emu_closures <- EMP_closures %>%
    filter(emu_nameshort==s & grepl("G",lfs_code) & hty_code !="F") %>%
    group_by(emu_nameshort,month) %>%
    summarize(fishery_closure_percent=max(fishery_closure_percent))
  myalpha=tmp[,paste("alpha_group[",g,",",emu_closures$month,"]",sep="")]
  if (nrow(emu_closures)>1){
    loss=colSums(apply(myalpha,1,function(x) x*emu_closures$fishery_closure_percent/100))
  } else {
    loss=myalpha*emu_closures$fishery_closure_percent/100
  }
  quantile(loss,probs=c(0.025,.5,.975))
},as.character(list_period1$emu_nameshort[list_period1$estimable]),list_period1$id_g[list_period1$estimable])

list_period1[list_period1$estimable, c("lossq2.5", "lossq50","lossq97.5")] =
  t(res_closures)

kable(list_period1[,c("emu_nameshort","lossq2.5","lossq50","lossq97.5")],
      col.names=c("emu","q2.5","median","q97.5"),
      caption="proportion of catch potentially lost because of EMP closure",
      digits=2)
proportion of catch potentially lost because of EMP closure
emu q2.5 median q97.5
ES_Astu 0.07 0.10 0.14
ES_Basq NA NA NA
ES_Cata NA NA NA
FR_Adou 0.04 0.05 0.07
FR_Arto 0.11 0.14 0.17
FR_Bret 0.06 0.08 0.11
FR_Garo 0.07 0.09 0.12
FR_Loir 0.04 0.04 0.06
FR_Sein NA NA NA
GB_NorW NA NA NA
GB_Seve 0.13 0.16 0.20
GB_SouW 0.48 0.51 0.55
GB_Wale 0.77 0.80 0.82
#######EU
#For glass eels, we summed catches over hty, therefore here, we aggregate closures
#taking the most restrictive if there are differences among habitats
list_period2=data.frame(emu_nameshort=emus[period==2])
list_period2$group=group_name[period==2]
list_period2$id_g=match(list_period2$group,group_name)

#we check that we have ladings data at least two years before the first EU closures
list_period2$estimable=sapply(list_period2$emu_nameshort, function(s) {
  length(which(charac_EU_closures$emu_nameshort==s 
               & grepl("G",charac_EU_closures$lfs_code) 
               & charac_EU_closures$hty_code != "F"))>0})

list_period2$estimable=list_period2$estimable &
(sapply(list_period2$id_g,function(e) min(glasseel_wide$season[group==e]))+2 <
sapply(list_period2$emu_nameshort,function(e) min(charac_EU_closures$year[charac_EU_closures$emu_nameshort==e &
                                                           grepl("G",charac_EU_closures$lfs_code) &
                                                    charac_EU_closures$hty_code !="F"])))

list_period2$lossq2.5=NA
list_period2$lossq50=NA
list_period2$lossq97.5=NA

res_closures=mapply(function(s,g) {
  emu_closures <- EU_closures %>%
    filter(emu_nameshort==s & grepl("G",lfs_code) & hty_code !="F") %>%
    group_by(emu_nameshort,month) %>%
    summarize(fishery_closure_percent=max(fishery_closure_percent))
  myalpha=tmp[,paste("alpha_group[",g,",",emu_closures$month,"]",sep="")]
  if (nrow(emu_closures)>1){
    loss=colSums(apply(myalpha,1,function(x) x*emu_closures$fishery_closure_percent/100))
  } else {
    loss=myalpha*emu_closures$fishery_closure_percent/100
  }
  quantile(loss,probs=c(0.025,.5,.975))
},as.character(list_period2$emu_nameshort[list_period2$estimable]),list_period2$id_g[list_period2$estimable])

list_period2[list_period2$estimable, c("lossq2.5", "lossq50","lossq97.5")] =
  t(res_closures)

kable(list_period2[,c("emu_nameshort","lossq2.5","lossq50","lossq97.5")],
      col.names=c("emu","q2.5","median","q97.5"),
      caption="proportion of catch potentially lost because of EU closure",
      digits=2)
proportion of catch potentially lost because of EU closure
emu q2.5 median q97.5
ES_Astu NA NA NA
ES_Basq NA NA NA
ES_Cant NA NA NA
ES_Cata NA NA NA
ES_Mino NA NA NA
ES_Vale NA NA NA
FR_Adou NA NA NA
FR_Arto NA NA NA
FR_Bret NA NA NA
FR_Garo NA NA NA
FR_Loir NA NA NA
FR_Sein NA NA NA
GB_SouW 0.01 0.05 0.14
list_period2$type="EU closure"
list_period1$type="EMP closure"
list_period=rbind.data.frame(list_period1,list_period2)
list_period$stage="G"
save(list_period,file="loss_glass_eel.rdata")

Yellow

First, let’s select data corresponding to yellow stage.

yellow_eel <- subset(res, res$lfs_code=="Y")

# we start by removing rows with only zero
all_zero <- yellow_eel %>%  group_by(emu_nameshort,lfs_code,hty_code,das_year) %>%
        summarize(S=sum(das_value)) %>% 
    filter(S==0)

yellow_eel <- yellow_eel %>% 
      anti_join(all_zero)
## Joining, by = c("das_year", "emu_nameshort", "lfs_code", "hty_code")
table(yellow_eel$hty_code)
## 
##    C    F   FC  FTC   MO    T 
##  645 1212  463   72  225  942
#We have many data, so we remove "FC" and "FTC" which are weirds mixes
yellow_eel <- yellow_eel %>%
  filter(!hty_code %in% c("FTC", "FC"))

#in this analysis, the unit will correspond to EMU / habitat so we create 
#corresponding column
yellow_eel$emu <- yellow_eel$emu_nameshort
yellow_eel$emu_nameshort <- paste(yellow_eel$emu_nameshort,
                                   yellow_eel$hty_code, sep="_")

#There are some duplicates for IE_West_F that should be summed up according to
#Russel
summed_up_IE <-yellow_eel %>%
  filter(yellow_eel$emu_nameshort=="IE_West_F") %>%
  group_by(das_year,das_month) %>%
  summarize(das_value=sum(das_value))

yellow_eel <- yellow_eel %>% 
  distinct(das_year,das_month,emu_nameshort, .keep_all = TRUE)

yellow_eel[yellow_eel$emu_nameshort=="IE_West_F",
          c("das_year","das_month","das_value") ] <- summed_up_IE

Similarly to seasonality, we will build season. We reuse the procedure made for silver eel and yellow eel seasonality, i.e. defining seasons per emu, with the season starting at the month with minimum landings. The month with lowest catch fmin define the beggining of the season (month_in_season=1) and season y stands for the 12 months from fmin y (e.g., if lowest migration is in december, season ranges from december to november, and season y denotes season from december y to november y+1).

#creating season
yelloweel <- do.call("rbind.data.frame",
                     lapply(unique(yellow_eel$emu_nameshort),
                            function(s)
                              season_creation(yellow_eel[yellow_eel$emu_nameshort==s,])))
months_peak_per_series<- unique(yelloweel[,c("emu_nameshort","peak_month")])

#large variety in the month with peak of catches among EMU / habitat
kable(table(months_peak_per_series$peak_month),
      caption="number of EMUs that peak in a month",
      col.names=c("month","number of EMUs"))
number of EMUs that peak in a month
month number of EMUs
4 2
5 7
6 8
7 12
8 6
9 5
10 1
12 2
#we remove data from season 2020
yelloweel <- yelloweel %>%
  filter(season < 2020)

Coastal/marine waters

Data selection

Now we should carry out data selection, more specifically, we want to eliminate rows with two many missing data, too much zero and to check whether there are no duplicates (though Cedric already did it). We mixed coastal and marine habitats since there are only one EMU with landings in marine habitat

yelloweel_coatal <- subset(yelloweel, yelloweel$hty_code %in% c("C", "MO"))
kept_seasons <- lapply(unique(yelloweel_coatal$emu_nameshort), function(s){
  sub_yellow <- subset(yelloweel_coatal, yelloweel_coatal$emu_nameshort==s)
  kept <- good_coverage_wave(sub_yellow)
  #we remove season in which we have less than 50 kg of landings
  if(!is.null(kept))
    kept <- kept[sapply(kept,function(k)
      sum(sub_yellow$das_value[sub_yellow$season==k],na.rm=TRUE)>50)]
  if (length(kept) == 0) kept <- NULL
  kept
})
## [1] "For  DE_Eide_C  a good season should cover months: 5 to 11"
## [1] "For  DE_Schl_C  a good season should cover months: 5 to 11"
## [1] "For  DK_total_MO  a good season should cover months: 4 to 11"
## [1] "For  ES_Murc_C  a good season should cover months: 11 to 3"
## [1] "For  GB_Angl_C  a good season should cover months: 5 to 11"
## [1] "For  GB_NorW_C  a good season should cover months: 5 to 8"
## [1] "For  GB_SouE_C  a good season should cover months: 4 to 10"
## [1] "For  GB_SouW_C  a good season should cover months: 4 to 11"
## [1] "For  GB_Tham_C  a good season should cover months: 5 to 4"
## [1] "For  SE_East_C  a good season should cover months: 4 to 11"
## [1] "For  SE_West_C  a good season should cover months: 5 to 11"

Finally, here are the series kept given previous criterion.

names(kept_seasons) <- unique(yelloweel_coatal$emu_nameshort)
kept_seasons[!sapply(kept_seasons,is.null)]
## $DE_Eide_C
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $DE_Schl_C
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $DK_total_MO
##  [1] 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
## [15] 2014 2015 2016 2017 2018 2019
## 
## $ES_Murc_C
## [1] 2014 2016
## 
## $GB_Angl_C
## [1] 2014 2015 2016 2017 2018
## 
## $GB_SouE_C
## [1] 2013 2014 2015 2016 2017
## 
## $GB_SouW_C
## [1] 2013 2014 2015 2016 2017
## 
## $SE_East_C
##  [1] 2000 2001 2002 2003 2004 2005 2006 2007 2008 2012 2013
## 
## $SE_West_C
## [1] 2000 2001 2002 2003 2004 2005 2006 2007 2008

Data preparation

We carry out the same procedure as for seasonality.

yelloweel_coastal_subset <- subset(yelloweel_coatal, 
                           mapply(function(season, series){
                             season %in% kept_seasons[[series]]
                           }, yelloweel_coatal$season, yelloweel_coatal$emu_nameshort))


yelloweel_coastal_wide <- pivot_wider(data=yelloweel_coastal_subset[, c("emu_nameshort",
                                                     "cou_code",
                                                     "season",
                                                     "das_month",
                                                     "das_value")],
                                names_from="das_month",
                                values_from="das_value")
names(yelloweel_coastal_wide)[-(1:3)] <- paste("m",
                                       names(yelloweel_coastal_wide)[-(1:3)],
                                       sep="")

###we count the number of zeros per lines to remove lines without enough
###fishes
data_poor <- data.frame(yelloweel_coastal_wide$emu_nameshort,
                        yelloweel_coastal_wide$season,
                  zero=rowSums(yelloweel_coastal_wide[, -(1:3)] == 0 |
                                 is.na(yelloweel_coastal_wide[, -(1:3)])),
           tot=rowSums(yelloweel_coastal_wide[, -(1:3)], na.rm=TRUE))
yelloweel_coastal_wide <- yelloweel_coastal_wide[data_poor$zero < 10, ]

table_datapoor(data_poor %>% filter(zero > 9 | tot<50)) #we remove years where we have less than 2 months)
“data poor”" situation
EMU season number of zero total catch
ES_Murc_C 2014 10 2623
GB_SouE_C 2013 11 64

It leads to a dataset with 75 rows.

We now replace NA value per zero since we selected our dataseries with missing months corresponding to insignificant months / closed months, and we compute proportions per month for each year.

yelloweel_coastal_wide <- yelloweel_coastal_wide %>%
  replace_na(replace=list(m1=0,
                          m2=0,
                          m3=0,
                          m4=0,
                          m5=0,
                          m6=0,
                          m7=0,
                          m8=0,
                          m9=0,
                          m10=0,
                          m11=0,
                          m12=0))
yelloweel_coastal_wide[, -(1:3)] <- yelloweel_coastal_wide[, -(1:3)] + 1e-3
total_catch_year <- rowSums(yelloweel_coastal_wide[, paste("m", 1:12, sep="")])
yelloweel_coastal_wide <- yelloweel_coastal_wide %>%
  mutate_at(.vars=paste("m",1:12,sep=""),function(x) x/total_catch_year)

The Commission asks us to compare the pattern before and after 2007, probably to see the effect of the Eel Regulation. It is therefore necessary to build a period index. However, since most countries implemented their EMPs only in 2009/2010, we split in 2010.

yelloweel_coastal_wide$period <- ifelse(yelloweel_coastal_wide$season>2009,
                                  2,
                                  1)

kable(table(yelloweel_coastal_wide$period,
       yelloweel_coastal_wide$emu_nameshort),
      row.names=TRUE,
      caption="number of seasons per EMU and period")
number of seasons per EMU and period
DE_Eide_C DE_Schl_C DK_total_MO ES_Murc_C GB_Angl_C GB_SouE_C GB_SouW_C SE_East_C SE_West_C
1 1 1 10 0 0 0 0 9 9
2 9 9 10 1 5 4 5 2 0

The situation is not well balanced. Most EMU which have data in periods 1 don’t have data in period 2 and conversely.

Running the model

group <- as.integer(interaction(yelloweel_coastal_wide$emu_nameshort,
                                            yelloweel_coastal_wide$period,
                                            drop=TRUE))
nb_occ_group <- table(group)
y <-as.matrix(yelloweel_coastal_wide[, paste("m", 1:12, sep="")])

Now, we make a loop to select the number of clusters based on a DIC criterion

cl <- makeCluster(3, 'FORK')
comparison <- parSapply(cl,2:7,
       function(nbclus){
         mydata <- build_data(nbclus)
         adapted <- FALSE
         while (!adapted){
           tryCatch({
             runjags.options(adapt.incomplete="error")
             res <- run.jags("jags_model.txt", monitor= c("deviance",
                                                          "alpha_group",
                                                          "cluster"),
                        summarise=FALSE, adapt=40000, method="parallel",
                        sample=2000,burnin=100000,n.chains=1,
                        inits=generate_init(nbclus, mydata)[[1]],
                        data=mydata)
                        adapted <- TRUE
                        res_mat <- as.matrix(as.mcmc.list(res))
                        silhouette <- median(compute_silhouette(res_mat))
                        nbused <- apply(res_mat, 1, function(iter){
                          length(table(iter[grep("cluster",
                                                 names(iter))]))
                        })
                        dic <- mean(res_mat[,1])+0.5*var(res_mat[,1])
                        stats <- c(dic,silhouette,mean(nbused))
                  }, error=function(e) {
                    print(paste("not adapted, restarting nbclus",nbclus))
                    }
                  )
         }
         stats
      })
stopCluster(cl)
best_yelloweel_coastal_landings <- data.frame(nbclus=2:(ncol(comparison)+1),
                                              dic=comparison[1, ],
                                              silhouette=comparison[2, ],
                                              used=comparison[3, ])
save(best_yelloweel_coastal_landings, file="yelloweel_coastal_landings_jags.rdata")
load("yelloweel_coastal_landings_jags.rdata")
best_yelloweel_coastal_landings
##   nbclus       dic silhouette   used
## 1      2 -13342.16  0.5919482 2.0000
## 2      3 -13469.98  0.4037846 3.0000
## 3      4 -13446.26  0.3970586 3.0005
## 4      5 -13513.30  0.3042887 4.0025
## 5      6 -13596.51  0.1050584 5.0035
## 6      7 -13573.80  0.1242397 5.0020

While 7 gives the best overall DIC, the DIC is rather flat and the number of cluster used does not evolve much so we stop at 3.

nbclus <- 3
mydata <-build_data(3)
adapted <- FALSE
while (!adapted){
   tryCatch({
      runjags.options(adapt.incomplete="error")
      myfit_yelloweel_coastal_landings <- run.jags("jags_model.txt", monitor= c("cluster", "esp", "alpha_group",
                                            "cluster", "centroid",
                                            "centroid_group",
                                            "distToClust", "duration_clus",
                                            "duration_group",
                                            "lambda","id_cluster",
                                            "centroid"),
                      summarise=FALSE, adapt=20000, method="parallel",
                      sample=10000,burnin=200000,n.chains=1, thin=5,
                      inits=generate_init(nbclus, mydata)[[1]], data=mydata)
      adapted <- TRUE
    }, error=function(e) {
       print(paste("not adapted, restarting nbclus",nbclus))
    })
}


save(myfit_yelloweel_coastal_landings, best_yelloweel_coastal_landings,
     file="yelloweel_coastal_landings_jags.rdata")

Results

Once fitted, we can plot monthly pattern per cluster

load("yelloweel_coastal_landings_jags.rdata")
nbclus <- 3
mydata <-build_data(3)
get_pattern_month <- function(res,type="cluster"){
  res_mat <- as.matrix(as.mcmc.list(res, add.mutate=FALSE))
  if (type=="cluster"){
    sub_mat <- as.data.frame(res_mat[,grep("esp",colnames(res_mat))])
  }
  sub_mat <- sub_mat %>% 
    pivot_longer(cols=1:ncol(sub_mat),
                 names_to="param",
                 values_to="proportion")
  tmp <- lapply(as.character(sub_mat$param),function(p) strsplit(p,"[[:punct:]]"))
  sub_mat$cluster<-as.factor(
    as.integer(lapply(tmp, function(tt) tt[[1]][2])))
  sub_mat$month <- as.character(lapply(tmp,
                                       function(tt) paste("m",
                                                          tt[[1]][3],
                                                          sep="")))
  sub_mat$month <- factor(sub_mat$month, levels=paste("m", 1:12, sep=""))
  sub_mat
}

pat <-get_pattern_month(myfit_yelloweel_coastal_landings)
pat$cluster <- factor(match(pat$cluster,c("2","3","1")),
                         levels=as.character(1:7))
ggplot(pat,aes(x=month,y=proportion))+
  geom_boxplot(aes(fill=cluster),outlier.shape=NA) +
  scale_fill_manual(values=cols)+facet_wrap(.~cluster, ncol=1) +
  theme_bw()

Clusters 1 peaks summer. Clusters 2 peaks in winter, cluster 3 lasts from may to november.

We compute some statistics to characterize the clusters.

table_characteristics(myfit_yelloweel_coastal_landings, 3)
characteristics of clusters
number of months to reach 80% of total
month of centroid
cluster median q2.5% q97.5% median q2.5% q97.5%
1 5 4 6 8.59 8.35 8.83
2 3 2 4 2.54 2.13 2.98
3 6 5 6 7.64 7.55 7.73

Duration indicates the minimum number of months that covers 80% of the wave (1st column is the median, and the 2 next one quantiles 2.5% and 97.5% of credibility intervals). Centroid is the centroid of the migration wave (e.g. 11.5 would indicate a migration centred around mid november). The first column is the median and the two next one the quantiles 2.5 and 97.5%.

We can also look at the belonging of the different groups.

groups <- interaction(yelloweel_coastal_wide$emu_nameshort,
                                            yelloweel_coastal_wide$period,
                                            drop=TRUE)
group_name <- levels(groups)
  
get_pattern_month <- function(res,mydata){
  

  tmp <- strsplit(as.character(group_name),
                  "\\.")
  ser <- as.character(lapply(tmp,function(tt){
    tt[1]
  }))
  period <- as.character(lapply(tmp,function(tt){
    tt[2]
  }))
  res_mat <- as.matrix(as.mcmc.list(res,add.mutate=FALSE))
  
  clus <- t(sapply(seq_len(length(unique(groups))), function(id){
    name_col <- paste("cluster[",id,"]",sep="")
    freq <- table(res_mat[,name_col])
    max_class <- names(freq)[order(freq,decreasing=TRUE)[1]]
    c(max_class,freq[as.character(1:nbclus)])
  }))
  storage.mode(clus) <- "numeric"
  classes <- as.data.frame(clus)
  names(classes) <- c("cluster", paste("clus",seq_len(nbclus),sep=""))
  cbind.data.frame(data.frame(ser=ser, period=period),
                   classes)
}

myclassif <- get_pattern_month(myfit_yelloweel_coastal_landings)
myclassif$cluster <- factor(match(myclassif$cluster,c("2","3","1")),
                         levels=as.character(1:7))

table_classif(myclassif)
EMU period Max cluster % clus 1 % clus 2 % clus 3
ES_Murc_C 2 1 0 100 0
DE_Schl_C 1 2 2 0 98
DE_Schl_C 2 2 0 0 100
DK_total_MO 1 2 0 0 100
DK_total_MO 2 2 0 0 100
GB_Angl_C 2 2 0 0 100
GB_SouE_C 2 2 0 0 100
GB_SouW_C 2 2 0 0 100
SE_East_C 1 2 0 0 100
SE_East_C 2 2 0 0 100
SE_West_C 1 2 0 0 100
DE_Eide_C 1 3 98 0 2
DE_Eide_C 2 3 100 0 0

In fact, nearly all EMUs fall in cluster 3. Cluster 2 corresponds only to ES_Murc and cluster 1 to DE_Eide.

myclassif_p1 <- subset(myclassif, myclassif$period == 1)
myclassif_p2 <- subset(myclassif, myclassif$period == 2)
emu$cluster1 <- factor(myclassif_p1$cluster[match(emu$name_short,
                                                  substr(myclassif_p1$ser,1,nchar(as.character(myclassif_p1$ser))-2))],
                       levels=1:7)
emu$cluster2 <- factor(myclassif_p2$cluster[match(emu$name_short,
                                                substr(myclassif_p2$ser,1,nchar(as.character(myclassif_p2$ser))-2))],
                       levels=1:7)
ggplot(data = cou) +  geom_sf(fill= "antiquewhite") +
        geom_sf(data=emu,aes(fill=cluster1)) + scale_fill_manual(values=cols)+
  theme_bw() +xlim(-20,30) + ylim(35,65) 

ggplot(data = cou) +  geom_sf(fill= "antiquewhite") +
        geom_sf(data=emu,aes(fill=cluster2)) + scale_fill_manual(values=cols)+
  theme_bw() +xlim(-20,30) + ylim(35,65)  

Exporting pattern per group

tmp <- as.matrix(as.mcmc.list(myfit_yelloweel_coastal_landings))
name_col = colnames(tmp)

pattern_Ycoast_landings=do.call("rbind.data.frame",
                                lapply(seq_len(length(levels(groups))), function(g)
                                   median_pattern_group(g, group_name,tmp, "Y","landings", hty_code="C")))


save(pattern_Ycoast_landings,file="pattern_Ycoast_landings.rdata")

Similarity between and after 2010

#which groups have data in both periods
occ=table(unique(yelloweel_coastal_wide[,c("emu_nameshort", "period")])[,1])
tocompare=names(occ)[which(occ>1)]

simi=sapply(tocompare, function(s){
  g=grep(s,group_name)
  esp1=tmp[,grep(paste("alpha_group\\[",g[1],",",sep=""),name_col)]
  esp2=tmp[,grep(paste("alpha_group\\[",g[2],",",sep=""),name_col)]
  quantile(apply(cbind(esp1,esp2),
                 1,
                 function(x) sum(pmin(x[1:12],x[13:24]))),
           probs=c(0.025,.5,.975))
})

similarity=data.frame(emu=tocompare,t(simi))

table_similarity(similarity)
similarity
EMU q2.5% median q97.5%
DE_Eide_C 0.60 0.75 0.87
DE_Schl_C 0.61 0.75 0.86
DK_total_MO 0.78 0.85 0.91
SE_East_C 0.66 0.78 0.88

Potential effect of EMP and EU closures

ncar=nchar(group_name)
period=as.integer(substr(as.character(group_name),ncar,ncar))
blocks=strsplit(group_name,"_")
emus=sapply(blocks,function(x)paste(x[1],x[2],sep="_"))
hty_code=sapply(blocks,function(x) substr(x[3],1,nchar(x[3])-2))



#######EMP
list_period1=data.frame(emu_nameshort=emus[period==1])
list_period1$group=group_name[period==1]
list_period1$id_g=match(list_period1$group,group_name)
list_period1$hty_code=hty_code[period==1]
  
#we check that we have ladings data at least two years before the first EMP closures
list_period1$estimable=mapply(function(s,hty) {
  length(which(charac_EMP_closures$emu_nameshort==s 
               & grepl("Y",charac_EMP_closures$lfs_code) 
               & grepl(hty, charac_EMP_closures$hty_code)))>0},
  list_period1$emu_nameshort, list_period1$hty_code)

list_period1$estimable=list_period1$estimable &
(sapply(list_period1$id_g,function(e) min(yelloweel_coastal_wide$season[group==e]))+2 <
mapply(function(e,hty) min(charac_EMP_closures$year[charac_EMP_closures$emu_nameshort==e &
                                                           grepl("Y",charac_EMP_closures$lfs_code) &
                                                    grepl(hty,charac_EMP_closures$hty_code)]),
       list_period1$emu_nameshort, list_period1$hty_code))

list_period1$lossq2.5=NA
list_period1$lossq50=NA
list_period1$lossq97.5=NA

res_closures=mapply(function(s,g,hty) {
  emu_closures <- EMP_closures %>%
    filter(emu_nameshort==s & grepl("Y",lfs_code) & grepl(hty, hty_code)) %>%
    group_by(emu_nameshort,month) %>%
    summarize(fishery_closure_percent=max(fishery_closure_percent))
  myalpha=tmp[,paste("alpha_group[",g,",",emu_closures$month,"]",sep="")]
  if (nrow(emu_closures)>1){
    loss=colSums(apply(myalpha,1,function(x) x*emu_closures$fishery_closure_percent/100))
  } else {
    loss=myalpha*emu_closures$fishery_closure_percent/100
  }
  quantile(loss,probs=c(0.025,.5,.975))
},as.character(list_period1$emu_nameshort[list_period1$estimable]),
list_period1$id_g[list_period1$estimable],
list_period1$hty[list_period1$estimable])

list_period1[list_period1$estimable, c("lossq2.5", "lossq50","lossq97.5")] =
  t(res_closures)

kable(list_period1[,c("emu_nameshort","lossq2.5","lossq50","lossq97.5")],
      col.names=c("emu","q2.5","median","q97.5"),
      caption="proportion of catch potentially lost because of EMP closure",
      digits=2)
proportion of catch potentially lost because of EMP closure
emu q2.5 median q97.5
DE_Eide NA NA NA
DE_Schl NA NA NA
DK_total NA NA NA
SE_East NA NA NA
SE_West NA NA NA
#######EU
list_period2=data.frame(emu_nameshort=emus[period==2])
list_period2$group=group_name[period==2]
list_period2$id_g=match(list_period2$group,group_name)
list_period2$hty_code=hty_code[period==2]
  
#we check that we have ladings data at least two years before the first EMP closures
list_period2$estimable=mapply(function(s,hty) {
  length(which(charac_EU_closures$emu_nameshort==s 
               & grepl("Y",charac_EU_closures$lfs_code) 
               & grepl(hty, charac_EU_closures$hty_code)))>0},
  list_period2$emu_nameshort, list_period2$hty_code)

list_period2$estimable=list_period2$estimable &
(sapply(list_period2$id_g,function(e) min(yelloweel_coastal_wide$season[group==e]))+2 <
mapply(function(e,hty) min(charac_EU_closures$year[charac_EU_closures$emu_nameshort==e &
                                                           grepl("Y",charac_EU_closures$lfs_code) &
                                                    grepl(hty,charac_EU_closures$hty_code)]),
       list_period2$emu_nameshort, list_period2$hty_code))

list_period2$lossq2.5=NA
list_period2$lossq50=NA
list_period2$lossq97.5=NA

res_closures=mapply(function(s,g,hty) {
  emu_closures <- EU_closures %>%
    filter(emu_nameshort==s & grepl("Y", lfs_code) & grepl(hty,hty_code)) %>%
    group_by(emu_nameshort,month) %>%
    summarize(fishery_closure_percent=max(fishery_closure_percent))
  myalpha=tmp[,paste("alpha_group[",g,",",emu_closures$month,"]",sep="")]
  if (nrow(emu_closures)>1){
    loss=colSums(apply(myalpha,1,function(x) x*emu_closures$fishery_closure_percent/100))
  } else {
    loss=myalpha*emu_closures$fishery_closure_percent/100
  }
  quantile(loss,probs=c(0.025,.5,.975))
},as.character(list_period2$emu_nameshort[list_period2$estimable]),
list_period2$id_g[list_period2$estimable],
list_period2$hty_code[list_period2$estimable])

list_period2[list_period2$estimable, c("lossq2.5", "lossq50","lossq97.5")] =
  t(res_closures)

kable(list_period2[,c("emu_nameshort","lossq2.5","lossq50","lossq97.5")],
      col.names=c("emu","q2.5","median","q97.5"),
      caption="proportion of catch potentially lost because of EU closure",
      digits=2)
proportion of catch potentially lost because of EU closure
emu q2.5 median q97.5
DE_Eide 0.01 0.02 0.03
DE_Schl 0.02 0.03 0.04
DK_total 0.07 0.10 0.14
ES_Murc NA NA NA
GB_Angl 0.00 0.01 0.02
GB_SouE 0.00 0.01 0.02
GB_SouW 0.00 0.01 0.01
SE_East 0.02 0.06 0.12
list_period2$type="EU closure"
list_period1$type="EMP closure"
list_period=rbind.data.frame(list_period1,list_period2)
list_period$stage="Y"
save(list_period,file="loss_yellowcoastal.rdata")

transitional waters

Data selection

Now we should carry out data selection, more specifically, we want to eliminate rows with two many missing data, too much zero and to check whether there are no duplicates (though Cedric already did it)

yelloweel_transitional <- subset(yelloweel, yelloweel$hty_code =="T")
kept_seasons <- lapply(unique(yelloweel_transitional$emu_nameshort), function(s){
  sub_yellow <- subset(yelloweel_transitional, yelloweel_transitional$emu_nameshort==s)
  kept <- good_coverage_wave(sub_yellow)
  #we remove season in which we have less than 50 kg of landings
  if(!is.null(kept))
    kept <- kept[sapply(kept,function(k)
      sum(sub_yellow$das_value[sub_yellow$season==k],na.rm=TRUE)>50)]
  if (length(kept) == 0) kept <- NULL
  kept
})
## [1] "For  DE_Eide_T  a good season should cover months: 4 to 11"
## [1] "For  DE_Elbe_T  a good season should cover months: 4 to 11"
## [1] "For  FR_Adou_T  a good season should cover months: 4 to 8"
## [1] "For  FR_Arto_T  a good season should cover months: 6 to 11"
## [1] "For  FR_Bret_T  a good season should cover months: 3 to 9"
## [1] "For  FR_Cors_T  a good season should cover months: 3 to 11"
## [1] "For  FR_Garo_T  a good season should cover months: 4 to 11"
## [1] "For  FR_Loir_T  a good season should cover months: 5 to 11"
## [1] "For  FR_Sein_T  a good season should cover months: 4 to 12"
## [1] "For GB_Dee_T not possible to define a season"
## [1] "For  NO_total_T  a good season should cover months: 5 to 11"

Finally, here are the series kept given previous criterion.

names(kept_seasons) <- unique(yelloweel_transitional$emu_nameshort)
kept_seasons[!sapply(kept_seasons,is.null)]
## $DE_Eide_T
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $DE_Elbe_T
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $FR_Adou_T
## [1] 2009 2011 2013 2014 2015 2016 2018
## 
## $FR_Arto_T
## [1] 2009
## 
## $FR_Bret_T
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $FR_Cors_T
## [1] 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $FR_Garo_T
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $FR_Loir_T
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $FR_Sein_T
## [1] 2009 2010 2015
## 
## $NO_total_T
## [1] 2001

Data preparation

We carry out the same procedure as for seasonality.

yelloweel_transitional_subset <- subset(yelloweel_transitional, 
                           mapply(function(season, series){
                             season %in% kept_seasons[[series]]
                           }, yelloweel_transitional$season, yelloweel_transitional$emu_nameshort))


yelloweel_transitional_wide <- pivot_wider(data=yelloweel_transitional_subset[, c("emu_nameshort",
                                                     "cou_code",
                                                     "season",
                                                     "das_month",
                                                     "das_value")],
                                names_from="das_month",
                                values_from="das_value")
names(yelloweel_transitional_wide)[-(1:3)] <- paste("m",
                                       names(yelloweel_transitional_wide)[-(1:3)],
                                       sep="")

###we count the number of zeros per lines to remove lines without enough
###fishes
data_poor <- data.frame(yelloweel_transitional_wide$emu_nameshort,
                        yelloweel_transitional_wide$season,
                  zero=rowSums(yelloweel_transitional_wide[, -(1:3)] == 0 |
                                 is.na(yelloweel_transitional_wide[, -(1:3)])),
           tot=rowSums(yelloweel_transitional_wide[, -(1:3)], na.rm=TRUE))
yelloweel_transitional_wide <- yelloweel_transitional_wide[data_poor$zero < 10 &
                                                             data_poor$tot>50, ]

table_datapoor(data_poor %>% filter(zero > 9 | tot<50)) #we remove years where we have less than 2 months)
“data poor”" situation
EMU season number of zero total catch
FR_Adou_T 2013 11 294
FR_Arto_T 2009 11 330
FR_Sein_T 2015 10 475

It leads to a dataset with 68 rows.

We now replace NA value per zero since we selected our dataseries with missing months corresponding to insignificant months / closed months, and we compute proportions per month for each year.

yelloweel_transitional_wide <- yelloweel_transitional_wide %>%
  replace_na(replace=list(m1=0,
                          m2=0,
                          m3=0,
                          m4=0,
                          m5=0,
                          m6=0,
                          m7=0,
                          m8=0,
                          m9=0,
                          m10=0,
                          m11=0,
                          m12=0))
yelloweel_transitional_wide[, -(1:3)] <- yelloweel_transitional_wide[, -(1:3)] + 1e-3
total_catch_year <- rowSums(yelloweel_transitional_wide[, paste("m", 1:12, sep="")])
yelloweel_transitional_wide <- yelloweel_transitional_wide %>%
  mutate_at(.vars=paste("m",1:12,sep=""),function(x) x/total_catch_year)

The Commission asks us to compare the pattern before and after 2007, probably to see the effect of the Eel Regulation. It is therefore necessary to build a period index. However, since most countries implemented their EMPs only in 2009/2010, we split in 2010.

yelloweel_transitional_wide$period <- ifelse(yelloweel_transitional_wide$season>2009,
                                  2,
                                  1)

table(yelloweel_transitional_wide$period,
       yelloweel_transitional_wide$emu_nameshort)
##    
##     DE_Eide_T DE_Elbe_T FR_Adou_T FR_Bret_T FR_Cors_T FR_Garo_T FR_Loir_T
##   1         1         1         1         1         0         1         1
##   2         9         9         5         9         9         9         9
##    
##     FR_Sein_T NO_total_T
##   1         1          1
##   2         1          0

The situation is not well balanced. Most EMU which have data in periods 2.

Running the model

group <- as.integer(interaction(yelloweel_transitional_wide$emu_nameshort,
                                            yelloweel_transitional_wide$period,
                                            drop=TRUE))
nb_occ_group <- table(group)
y <-as.matrix(yelloweel_transitional_wide[, paste("m", 1:12, sep="")])

Now, we make a loop to select the number of clusters based on a DIC criterion

cl <- makeCluster(3, 'FORK')
comparison <- parSapply(cl, 2:7,
       function(nbclus){
         mydata <- build_data(nbclus)
         adapted <- FALSE
         while (!adapted){
           tryCatch({
             runjags.options(adapt.incomplete="error")
             res <- run.jags("jags_model.txt", monitor= c("deviance",
                                                          "alpha_group",
                                                          "cluster"),
                        summarise=FALSE, adapt=40000, method="parallel",
                        sample=2000,burnin=100000,n.chains=1,
                        inits=generate_init(nbclus, mydata)[[1]],
                        data=mydata)
                        adapted <- TRUE
                        res_mat <- as.matrix(as.mcmc.list(res))
                        silhouette <- median(compute_silhouette(res_mat))
                        nbused <- apply(res_mat, 1, function(iter){
                          length(table(iter[grep("cluster",
                                                 names(iter))]))
                        })
                        dic <- mean(res_mat[,1])+0.5*var(res_mat[,1])
                        stats <- c(dic,silhouette,mean(nbused))
                  }, error=function(e) {
                    print(paste("not adapted, restarting nbclus",nbclus))
                    }
                  )
         }
         stats
      })
stopCluster(cl)
best_yelloweel_transitional_landings <- data.frame(nbclus=2:(ncol(comparison)+1),
                                              dic=comparison[1, ],
                                              silhouette=comparison[2, ],
                                              nbused=comparison[3,])
save(best_yelloweel_transitional_landings, file="yelloweel_transitional_landings_jags.rdata")
load("yelloweel_transitional_landings_jags.rdata")
best_yelloweel_transitional_landings
##   nbclus       dic silhouette nbused
## 1      2 -15909.74 0.20090660      2
## 2      3 -15863.20 0.05489090      3
## 3      4 -16252.08 0.11615278      4
## 4      5 -16456.02 0.08891747      5
## 5      6 -16581.52 0.08898468      6
## 6      7 -16559.32 0.07625377      6

4 appears to be a good solution: good silhouette and we have only 4 groups.

nbclus <- 4
mydata <-build_data(4)
adapted <- FALSE
while (!adapted){
   tryCatch({
      runjags.options(adapt.incomplete="error")
      myfit_yelloweel_transitional_landings <- run.jags("jags_model.txt", monitor= c("cluster", "esp", "alpha_group",
                                            "cluster", "centroid",
                                            "centroid_group",
                                            "distToClust", "duration_clus",
                                            "duration_group",
                                            "lambda","id_cluster",
                                            "centroid"),
                      summarise=FALSE, adapt=20000, method="parallel",
                      sample=10000,burnin=200000,n.chains=1, thin=5,
                      inits=generate_init(nbclus, mydata)[[1]], data=mydata)
      adapted <- TRUE
    }, error=function(e) {
       print(paste("not adapted, restarting nbclus",nbclus))
    })
}


save(myfit_yelloweel_transitional_landings, best_yelloweel_transitional_landings,
     file="yelloweel_transitional_landings_jags.rdata")

Results

Once fitted, we can plot monthly pattern per cluster

load("yelloweel_transitional_landings_jags.rdata")
nbclus <- 4
mydata <-build_data(4)
get_pattern_month <- function(res,type="cluster"){
  res_mat <- as.matrix(as.mcmc.list(res, add.mutate=FALSE))
  if (type=="cluster"){
    sub_mat <- as.data.frame(res_mat[,grep("esp",colnames(res_mat))])
  }
  sub_mat <- sub_mat %>% 
    pivot_longer(cols=1:ncol(sub_mat),
                 names_to="param",
                 values_to="proportion")
  tmp <- lapply(as.character(sub_mat$param),function(p) strsplit(p,"[[:punct:]]"))
  sub_mat$cluster<-as.factor(
    as.integer(lapply(tmp, function(tt) tt[[1]][2])))
  sub_mat$month <- as.character(lapply(tmp,
                                       function(tt) paste("m",
                                                          tt[[1]][3],
                                                          sep="")))
  sub_mat$month <- factor(sub_mat$month, levels=paste("m", 1:12, sep=""))
  sub_mat
}

pat <-get_pattern_month(myfit_yelloweel_transitional_landings)
pat$cluster <- factor(match(pat$cluster,c("3", "2","4","1")),
                      levels=as.character(1:7))
ggplot(pat,aes(x=month,y=proportion))+
  geom_boxplot(aes(fill=cluster),outlier.shape=NA) +
  scale_fill_manual(values=cols)+facet_wrap(.~cluster, ncol=1)+
  theme_bw()

There is much more diversity than in coastal waters. Some clusters peak in srping (3), summer (2), autumn (1) and one has two peaks (4).

We compute some statistics to characterize the clusters.

table_characteristics(myfit_yelloweel_transitional_landings, 4)
characteristics of clusters
number of months to reach 80% of total
month of centroid
cluster median q2.5% q97.5% median q2.5% q97.5%
1 5 4 5 11.35 11.05 11.68
2 3 2 3 7.59 7.29 7.91
3 4 3 4 5.75 5.64 5.85
4 6 5 6 8.32 8.16 8.47

Duration indicates the minimum number of months that covers 80% of the wave (1st column is the median, and the 2 next one quantiles 2.5% and 97.5% of credibility intervals). Centroid is the centroid of the migration wave (e.g. 11.5 would indicate a migration centred around mid november). The first column is the median and the two next one the quantiles 2.5 and 97.5%.

We can also look at the belonging of the different groups.

groups <- interaction(yelloweel_transitional_wide$emu_nameshort,
                                            yelloweel_transitional_wide$period,
                                            drop=TRUE)
group_name <- levels(groups)

get_pattern_month <- function(res,mydata){
  
  tmp <- strsplit(as.character(group_name),
                  "\\.")
  ser <- as.character(lapply(tmp,function(tt){
    tt[1]
  }))
  period <- as.character(lapply(tmp,function(tt){
    tt[2]
  }))
  res_mat <- as.matrix(as.mcmc.list(res,add.mutate=FALSE))
  
  clus <- t(sapply(seq_len(length(unique(groups))), function(id){
    name_col <- paste("cluster[",id,"]",sep="")
    freq <- table(res_mat[,name_col])
    max_class <- names(freq)[order(freq,decreasing=TRUE)[1]]
    c(max_class,freq[as.character(1:nbclus)])
  }))
  storage.mode(clus) <- "numeric"
  classes <- as.data.frame(clus)
  names(classes) <- c("cluster", paste("clus",seq_len(nbclus),sep=""))
  cbind.data.frame(data.frame(ser=ser, period=period),
                   classes)
}

myclassif <- get_pattern_month(myfit_yelloweel_transitional_landings)
myclassif$cluster <- factor(match(myclassif$cluster,c("3", "2","4","1")),
                      levels=as.character(1:7))

table_classif(myclassif)
EMU period Max cluster % clus 1 % clus 2 % clus 3 % clus 4
FR_Adou_T 1 1 0 0 100 0
FR_Adou_T 2 1 0 0 100 0
FR_Bret_T 2 1 0 0 100 0
FR_Garo_T 2 1 0 0 100 0
FR_Sein_T 1 1 0 0 100 0
FR_Bret_T 1 2 0 96 2 2
FR_Sein_T 2 2 0 100 0 0
DE_Eide_T 1 3 0 0 0 100
DE_Eide_T 2 3 0 0 0 100
DE_Elbe_T 1 3 0 0 0 100
DE_Elbe_T 2 3 0 0 0 100
FR_Garo_T 1 3 1 0 0 99
FR_Loir_T 1 3 1 0 13 87
FR_Loir_T 2 3 0 0 0 100
NO_total_T 1 3 0 0 0 100
FR_Cors_T 2 4 100 0 0 0

Cluster 4 stands only for Corsica. Some French EMUs have changed clusters after 2010 towards cluster 1 which has a small duration.

myclassif_p1 <- subset(myclassif, myclassif$period == 1)
myclassif_p2 <- subset(myclassif, myclassif$period == 2)
emu$cluster1 <- factor(myclassif_p1$cluster[match(emu$name_short,
                                                  substr(myclassif_p1$ser,1,nchar(as.character(myclassif_p1$ser))-2))],
                       levels=1:7)
emu$cluster2 <- factor(myclassif_p2$cluster[match(emu$name_short,
                                                substr(myclassif_p2$ser,1,nchar(as.character(myclassif_p2$ser))-2))],
                       levels=1:7)
ggplot(data = cou) +  geom_sf(fill= "antiquewhite") +
        geom_sf(data=emu,aes(fill=cluster1)) + scale_fill_manual(values=cols)+
  theme_bw() +xlim(-20,30) + ylim(35,65) 

ggplot(data = cou) +  geom_sf(fill= "antiquewhite") +
        geom_sf(data=emu,aes(fill=cluster2)) + scale_fill_manual(values=cols)+
  theme_bw() +xlim(-20,30) + ylim(35,65)  

Exporting pattern per group

tmp <- as.matrix(as.mcmc.list(myfit_yelloweel_transitional_landings))
name_col = colnames(tmp)

pattern_Ytrans_landings=do.call("rbind.data.frame",
                                lapply(seq_len(length(levels(groups))), function(g)
                                   median_pattern_group(g, group_name,tmp, "Y","landings", hty_code="T")))
save(pattern_Ytrans_landings,file="pattern_Ytrans_landings.rdata")

Similarity between and after 2010

#which groups have data in both periods
occ=table(unique(yelloweel_transitional_wide[,c("emu_nameshort", "period")])[,1])
tocompare=names(occ)[which(occ>1)]

simi=sapply(tocompare, function(s){
  g=grep(s,group_name)
  esp1=tmp[,grep(paste("alpha_group\\[",g[1],",",sep=""),name_col)]
  esp2=tmp[,grep(paste("alpha_group\\[",g[2],",",sep=""),name_col)]
  quantile(apply(cbind(esp1,esp2),
                 1,
                 function(x) sum(pmin(x[1:12],x[13:24]))),
           probs=c(0.025,.5,.975))
})

similarity=data.frame(emu=tocompare,t(simi))

table_similarity(similarity)
similarity
EMU q2.5% median q97.5%
DE_Eide_T 0.49 0.66 0.81
DE_Elbe_T 0.62 0.76 0.88
FR_Adou_T 0.54 0.72 0.87
FR_Bret_T 0.42 0.58 0.73
FR_Garo_T 0.44 0.60 0.75
FR_Loir_T 0.49 0.66 0.80
FR_Sein_T 0.09 0.14 0.21

Potential effect of EMP and EU closures

ncar=nchar(group_name)
period=as.integer(substr(as.character(group_name),ncar,ncar))
blocks=strsplit(group_name,"_")
emus=sapply(blocks,function(x)paste(x[1],x[2],sep="_"))
hty_code=sapply(blocks,function(x) substr(x[3],1,nchar(x[3])-2))



#######EMP
list_period1=data.frame(emu_nameshort=emus[period==1])
list_period1$group=group_name[period==1]
list_period1$id_g=match(list_period1$group,group_name)
list_period1$hty_code=hty_code[period==1]
  
#we check that we have ladings data at least two years before the first EMP closures
list_period1$estimable=mapply(function(s,hty) {
  length(which(charac_EMP_closures$emu_nameshort==s 
               & grepl("Y",charac_EMP_closures$lfs_code) 
               & grepl(hty, charac_EMP_closures$hty_code)))>0},
  list_period1$emu_nameshort, list_period1$hty_code)

list_period1$estimable=list_period1$estimable &
(sapply(list_period1$id_g,function(e) min(yelloweel_transitional_wide$season[group==e]))+2 <
mapply(function(e,hty) min(charac_EMP_closures$year[charac_EMP_closures$emu_nameshort==e &
                                                           grepl("Y",charac_EMP_closures$lfs_code) &
                                                    grepl(hty,charac_EMP_closures$hty_code)]),
       list_period1$emu_nameshort, list_period1$hty_code))

list_period1$lossq2.5=NA
list_period1$lossq50=NA
list_period1$lossq97.5=NA

res_closures=mapply(function(s,g,hty) {
  emu_closures <- EMP_closures %>%
    filter(emu_nameshort==s & grepl("Y",lfs_code) & grepl(hty, hty_code)) %>%
    group_by(emu_nameshort,month) %>%
    summarize(fishery_closure_percent=max(fishery_closure_percent))
  myalpha=tmp[,paste("alpha_group[",g,",",emu_closures$month,"]",sep="")]
  if (nrow(emu_closures)>1){
    loss=colSums(apply(myalpha,1,function(x) x*emu_closures$fishery_closure_percent/100))
  } else {
    loss=myalpha*emu_closures$fishery_closure_percent/100
  }
  quantile(loss,probs=c(0.025,.5,.975))
},as.character(list_period1$emu_nameshort[list_period1$estimable]),
list_period1$id_g[list_period1$estimable],
list_period1$hty[list_period1$estimable])

list_period1[list_period1$estimable, c("lossq2.5", "lossq50","lossq97.5")] =
  t(res_closures)

kable(list_period1[,c("emu_nameshort","lossq2.5","lossq50","lossq97.5")],
      col.names=c("emu","q2.5","median","q97.5"),
      caption="proportion of catch potentially lost because of EMP closure",
      digits=2)
proportion of catch potentially lost because of EMP closure
emu q2.5 median q97.5
DE_Eide NA NA NA
DE_Elbe NA NA NA
FR_Adou NA NA NA
FR_Bret NA NA NA
FR_Garo NA NA NA
FR_Loir NA NA NA
FR_Sein NA NA NA
NO_total NA NA NA
#######EU
list_period2=data.frame(emu_nameshort=emus[period==2])
list_period2$group=group_name[period==2]
list_period2$id_g=match(list_period2$group,group_name)
list_period2$hty_code=hty_code[period==2]
  
#we check that we have ladings data at least two years before the first EMP closures
list_period2$estimable=mapply(function(s,hty) {
  length(which(charac_EU_closures$emu_nameshort==s 
               & grepl("Y",charac_EU_closures$lfs_code) 
               & grepl(hty, charac_EU_closures$hty_code)))>0},
  list_period2$emu_nameshort, list_period2$hty_code)

list_period2$estimable=list_period2$estimable &
(sapply(list_period2$id_g,function(e) min(yelloweel_transitional_wide$season[group==e]))+2 <
mapply(function(e,hty) min(charac_EU_closures$year[charac_EU_closures$emu_nameshort==e &
                                                           grepl("Y",charac_EU_closures$lfs_code) &
                                                    grepl(hty,charac_EU_closures$hty_code)]),
       list_period2$emu_nameshort, list_period2$hty_code))

list_period2$lossq2.5=NA
list_period2$lossq50=NA
list_period2$lossq97.5=NA

res_closures=mapply(function(s,g,hty) {
  emu_closures <- EU_closures %>%
    filter(emu_nameshort==s & grepl("Y", lfs_code) & grepl(hty,hty_code)) %>%
    group_by(emu_nameshort,month) %>%
    summarize(fishery_closure_percent=max(fishery_closure_percent))
  myalpha=tmp[,paste("alpha_group[",g,",",emu_closures$month,"]",sep="")]
  if (nrow(emu_closures)>1){
    loss=colSums(apply(myalpha,1,function(x) x*emu_closures$fishery_closure_percent/100))
  } else {
    loss=myalpha*emu_closures$fishery_closure_percent/100
  }
  quantile(loss,probs=c(0.025,.5,.975))
},as.character(list_period2$emu_nameshort[list_period2$estimable]),
list_period2$id_g[list_period2$estimable],
list_period2$hty_code[list_period2$estimable])

list_period2[list_period2$estimable, c("lossq2.5", "lossq50","lossq97.5")] =
  t(res_closures)

kable(list_period2[,c("emu_nameshort","lossq2.5","lossq50","lossq97.5")],
      col.names=c("emu","q2.5","median","q97.5"),
      caption="proportion of catch potentially lost because of EU closure",
      digits=2)
proportion of catch potentially lost because of EU closure
emu q2.5 median q97.5
DE_Eide NA NA NA
DE_Elbe 0.03 0.05 0.07
FR_Adou NA NA NA
FR_Bret NA NA NA
FR_Cors 0.05 0.08 0.12
FR_Garo NA NA NA
FR_Loir NA NA NA
FR_Sein NA NA NA
list_period2$type="EU closure"
list_period1$type="EMP closure"
list_period=rbind.data.frame(list_period1,list_period2)
list_period$stage="Y"
save(list_period,file="loss_yellowtransitional.rdata")

freshwater waters

Data selection

Now we should carry out data selection, more specifically, we want to eliminate rows with two many missing data, too much zero and to check whether there are no duplicates (though Cedric already did it)

yelloweel_freshwater <- subset(yelloweel, yelloweel$hty_code =="F")
kept_seasons <- lapply(unique(yelloweel_freshwater$emu_nameshort), function(s){
  sub_yellow <- subset(yelloweel_freshwater, yelloweel_freshwater$emu_nameshort==s)
  kept <- good_coverage_wave(sub_yellow)
  #we remove season in which we have less than 50 kg of landings
  if(!is.null(kept))
    kept <- kept[sapply(kept,function(k)
      sum(sub_yellow$das_value[sub_yellow$season==k],na.rm=TRUE)>50)]
  if (length(kept) == 0) kept <- NULL
  kept
})
## [1] "For  DE_Eide_F  a good season should cover months: 4 to 10"
## [1] "For  DE_Elbe_F  a good season should cover months: 4 to 11"
## [1] "For  DE_Schl_F  a good season should cover months: 4 to 11"
## [1] "For  DE_Warn_F  a good season should cover months: 3 to 10"
## [1] "For FR_Adou_F not possible to define a season"
## [1] "For  FR_Garo_F  a good season should cover months: 4 to 11"
## [1] "For  FR_Loir_F  a good season should cover months: 3 to 12"
## [1] "For FR_Rhin_F not possible to define a season"
## [1] "For  FR_Rhon_F  a good season should cover months: 3 to 11"
## [1] "For  FR_Sein_F  a good season should cover months: 3 to 11"
## [1] "For  GB_Angl_F  a good season should cover months: 4 to 11"
## [1] "For  GB_Dee_F  a good season should cover months: 6 to 10"
## [1] "For  GB_Humb_F  a good season should cover months: 12 to 10"
## [1] "For  GB_NorW_F  a good season should cover months: 5 to 11"
## [1] "For  GB_SouE_F  a good season should cover months: 12 to 11"
## [1] "For  GB_SouW_F  a good season should cover months: 11 to 10"
## [1] "For  GB_Tham_F  a good season should cover months: 5 to 11"
## [1] "For  GB_Wale_F  a good season should cover months: 11 to 10"
## [1] "For IE_East_F not possible to define a season"
## [1] "For  IE_West_F  a good season should cover months: 5 to 12"
## [1] "For  SE_Inla_F  a good season should cover months: 12 to 9"

Finally, here are the series kept given previous criterion.

names(kept_seasons) <- unique(yelloweel_freshwater$emu_nameshort)
kept_seasons[!sapply(kept_seasons,is.null)]
## $DE_Eide_F
## [1] 2009 2010
## 
## $DE_Elbe_F
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $DE_Schl_F
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $DE_Warn_F
## [1] 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $FR_Garo_F
## [1] 2000 2001 2002 2003 2007 2008 2009
## 
## $FR_Loir_F
## [1] 2000 2001 2002
## 
## $FR_Rhon_F
## [1] 2001 2002
## 
## $GB_Angl_F
## [1] 2014 2015 2016 2017 2018
## 
## $GB_Dee_F
## [1] 2014 2016 2017 2018
## 
## $GB_NorW_F
## [1] 2013 2014 2015 2016 2017
## 
## $GB_Tham_F
## [1] 2014 2015 2016 2017
## 
## $IE_West_F
## [1] 2006

Data preparation

We carry out the same procedure as for seasonality.

yelloweel_freshwater_subset <- subset(yelloweel_freshwater, 
                           mapply(function(season, series){
                             season %in% kept_seasons[[series]]
                           }, yelloweel_freshwater$season, yelloweel_freshwater$emu_nameshort))


yelloweel_freshwater_wide <- pivot_wider(data=yelloweel_freshwater_subset[, c("emu_nameshort",
                                                     "cou_code",
                                                     "season",
                                                     "das_month",
                                                     "das_value")],
                                names_from="das_month",
                                values_from="das_value")
names(yelloweel_freshwater_wide)[-(1:3)] <- paste("m",
                                       names(yelloweel_freshwater_wide)[-(1:3)],
                                       sep="")

###we count the number of zeros per lines to remove lines without enough
###fishes
data_poor <- data.frame(yelloweel_freshwater_wide$emu_nameshort,
                        yelloweel_freshwater_wide$season,
                  zero=rowSums(yelloweel_freshwater_wide[, -(1:3)] == 0 |
                                 is.na(yelloweel_freshwater_wide[, -(1:3)])),
           tot=rowSums(yelloweel_freshwater_wide[, -(1:3)], na.rm=TRUE))
yelloweel_freshwater_wide <- yelloweel_freshwater_wide[data_poor$zero < 10, ]

table_datapoor(data_poor %>% filter(zero > 9 | tot<50)) #we remove years where we have less than 2 months)
“data poor”" situation
EMU season number of zero total catch

It leads to a dataset with 62 rows.

We now replace NA value per zero since we selected our dataseries with missing months corresponding to insignificant months / closed months, and we compute proportions per month for each year.

yelloweel_freshwater_wide <- yelloweel_freshwater_wide %>%
  replace_na(replace=list(m1=0,
                          m2=0,
                          m3=0,
                          m4=0,
                          m5=0,
                          m6=0,
                          m7=0,
                          m8=0,
                          m9=0,
                          m10=0,
                          m11=0,
                          m12=0))
yelloweel_freshwater_wide[, -(1:3)] <- yelloweel_freshwater_wide[, -(1:3)] + 1e-3
total_catch_year <- rowSums(yelloweel_freshwater_wide[, paste("m", 1:12, sep="")])
yelloweel_freshwater_wide <- yelloweel_freshwater_wide %>%
  mutate_at(.vars=paste("m",1:12,sep=""),function(x) x/total_catch_year)

The Commission asks us to compare the pattern before and after 2007, probably to see the effect of the Eel Regulation. It is therefore necessary to build a period index. However, since most countries implemented their EMPs only in 2009/2010, we split in 2010.

yelloweel_freshwater_wide$period <- ifelse(yelloweel_freshwater_wide$season>2009,
                                  2,
                                  1)

kable(table(yelloweel_freshwater_wide$period,
       yelloweel_freshwater_wide$emu_nameshort),
      row.names=TRUE,caption="number of seasons per EMU and period")
number of seasons per EMU and period
DE_Eide_F DE_Elbe_F DE_Schl_F DE_Warn_F FR_Garo_F FR_Loir_F FR_Rhon_F GB_Angl_F GB_Dee_F GB_NorW_F GB_Tham_F IE_West_F
1 1 1 1 0 7 3 2 0 0 0 0 1
2 1 9 9 9 0 0 0 5 4 5 4 0

The situation is not well balanced. Most EMU which have data in periods 1 don’t have data in period 2 and conversely.

Running the model

group <- as.integer(interaction(yelloweel_freshwater_wide$emu_nameshort,
                                            yelloweel_freshwater_wide$period,
                                            drop=TRUE))
nb_occ_group <- table(group)
y <-as.matrix(yelloweel_freshwater_wide[, paste("m", 1:12, sep="")])

Now, we make a loop to select the number of clusters based on a DIC criterion

cl <- makeCluster(3, 'FORK')
comparison <- parSapply(cl, 2:7,
       function(nbclus){
         mydata <- build_data(nbclus)
         adapted <- FALSE
         while (!adapted){
           tryCatch({
             runjags.options(adapt.incomplete="error")
             res <- run.jags("jags_model.txt", monitor= c("deviance",
                                                          "alpha_group",
                                                          "cluster"),
                        summarise=FALSE, adapt=40000, method="parallel",
                        sample=2000,burnin=100000,n.chains=1,
                        inits=generate_init(nbclus, mydata)[[1]],
                        data=mydata)
                        adapted <- TRUE
                        res_mat <- as.matrix(as.mcmc.list(res))
                        silhouette <- median(compute_silhouette(res_mat))
                        nbused <- apply(res_mat, 1, function(iter){
                          length(table(iter[grep("cluster",
                                                 names(iter))]))
                        })
                        dic <- mean(res_mat[,1])+0.5*var(res_mat[,1])
                        stats <- c(dic,silhouette,mean(nbused))
                  }, error=function(e) {
                    print(paste("not adapted, restarting nbclus",nbclus))
                    }
                  )
         }
         stats
      })
stopCluster(cl)
best_yelloweel_freshwater_landings <- data.frame(nbclus=2:(ncol(comparison)+1),
                                              dic=comparison[1, ],
                                              silhouette=comparison[2, ],
                                              used=comparison[3,])
save(best_yelloweel_freshwater_landings, file="yelloweel_freshwater_landings_jags.rdata")
load("yelloweel_freshwater_landings_jags.rdata")
best_yelloweel_freshwater_landings
##   nbclus       dic silhouette used
## 1      2 -11120.11 0.18902292    2
## 2      3 -11033.32 0.01501762    3
## 3      4 -11138.97 0.08596161    3
## 4      5 -11159.30 0.09310377    3
## 5      6 -11155.76 0.11145854    3
## 6      7 -11152.48 0.04454594    4

Silhouette and DIC does not move much after 4, but only 3 clusters are used, therefore we keep 3.

nbclus <- 3
mydata <-build_data(3)
adapted <- FALSE
while (!adapted){
   tryCatch({
      runjags.options(adapt.incomplete="error")
      myfit_yelloweel_freshwater_landings <- run.jags("jags_model.txt", monitor= c("cluster", "esp", "alpha_group",
                                            "cluster", "centroid",
                                            "centroid_group",
                                            "distToClust", "duration_clus",
                                            "duration_group",
                                            "lambda","id_cluster",
                                            "centroid"),
                      summarise=FALSE, adapt=20000, method="parallel",
                      sample=10000,burnin=200000,n.chains=1, thin=5,
                      inits=generate_init(nbclus, mydata)[[1]], data=mydata)
      adapted <- TRUE
    }, error=function(e) {
       print(paste("not adapted, restarting nbclus",nbclus))
    })
}


save(myfit_yelloweel_freshwater_landings, best_yelloweel_freshwater_landings,
     file="yelloweel_freshwater_landings_jags.rdata")

Results

Once fitted, we can plot monthly pattern per cluster

load("yelloweel_freshwater_landings_jags.rdata")
nbclus <- 3
mydata <-build_data(3)
get_pattern_month <- function(res,type="cluster"){
  res_mat <- as.matrix(as.mcmc.list(res, add.mutate=FALSE))
  if (type=="cluster"){
    sub_mat <- as.data.frame(res_mat[,grep("esp",colnames(res_mat))])
  }
  sub_mat <- sub_mat %>% 
    pivot_longer(cols=1:ncol(sub_mat),
                 names_to="param",
                 values_to="proportion")
  tmp <- lapply(as.character(sub_mat$param),function(p) strsplit(p,"[[:punct:]]"))
  sub_mat$cluster<-as.factor(
    as.integer(lapply(tmp, function(tt) tt[[1]][2])))
  sub_mat$month <- as.character(lapply(tmp,
                                       function(tt) paste("m",
                                                          tt[[1]][3],
                                                          sep="")))
  sub_mat$month <- factor(sub_mat$month, levels=paste("m", 1:12, sep=""))
  sub_mat
}

pat <-get_pattern_month(myfit_yelloweel_freshwater_landings)
pat$cluster <- factor(match(pat$cluster, c("1","3","2")),
                       levels=as.character(1:7))
ggplot(pat,aes(x=month,y=proportion))+
  geom_boxplot(aes(fill=cluster),outlier.shape=NA) +
  scale_fill_manual(values=cols)+facet_wrap(.~cluster, ncol=1) +
  theme_bw()

Clusters 1 and 3 are bivariate, with 1 peaking in spring and autumn and 3 peaking in summer and autumn. Cluster 2 is widespread from may to november.

We compute some statistics to characterize the clusters.

table_characteristics(myfit_yelloweel_freshwater_landings, 3)
characteristics of clusters
number of months to reach 80% of total
month of centroid
cluster median q2.5% q97.5% median q2.5% q97.5%
1 7 5 8 5.75 4.75 6.65
2 4 4 5 7.76 7.52 8.01
3 6 6 6 6.92 6.79 7.05

Duration indicates the minimum number of months that covers 80% of the wave (1st column is the median, and the 2 next one quantiles 2.5% and 97.5% of credibility intervals). Centroid is the centroid of the migration wave (e.g. 11.5 would indicate a migration centred around mid november). The first column is the median and the two next one the quantiles 2.5 and 97.5%.

We can also look at the belonging of the different groups.

groups <- interaction(yelloweel_freshwater_wide$emu_nameshort,
                                            yelloweel_freshwater_wide$period,
                                            drop=TRUE)
group_name <- levels(groups)

get_pattern_month <- function(res,mydata){
  
  tmp <- strsplit(as.character(group_name),
                  "\\.")
  ser <- as.character(lapply(tmp,function(tt){
    tt[1]
  }))
  period <- as.character(lapply(tmp,function(tt){
    tt[2]
  }))
  res_mat <- as.matrix(as.mcmc.list(res,add.mutate=FALSE))
  
  clus <- t(sapply(seq_len(length(unique(groups))), function(id){
    name_col <- paste("cluster[",id,"]",sep="")
    freq <- table(res_mat[,name_col])
    max_class <- names(freq)[order(freq,decreasing=TRUE)[1]]
    c(max_class,freq[as.character(1:nbclus)])
  }))
  storage.mode(clus) <- "numeric"
  classes <- as.data.frame(clus)
  names(classes) <- c("cluster", paste("clus",seq_len(nbclus),sep=""))
  cbind.data.frame(data.frame(ser=ser, period=period),
                   classes)
}

myclassif <- get_pattern_month(myfit_yelloweel_freshwater_landings)
myclassif$cluster <- factor(match(myclassif$cluster, c("1","3","2")),
                       levels=as.character(1:7))

table_classif(myclassif)
EMU period Max cluster % clus 1 % clus 2 % clus 3
FR_Loir_F 1 1 91 0 9
DE_Eide_F 1 2 0 2 98
DE_Eide_F 2 2 0 1 99
DE_Elbe_F 1 2 0 0 100
DE_Elbe_F 2 2 0 0 100
DE_Schl_F 1 2 0 0 100
DE_Schl_F 2 2 0 0 100
DE_Warn_F 2 2 0 0 100
FR_Garo_F 1 2 0 0 100
FR_Rhon_F 1 2 0 0 100
GB_Angl_F 2 2 0 0 100
GB_Tham_F 2 2 0 0 100
IE_West_F 1 2 0 6 94
GB_Dee_F 2 3 0 100 0
GB_NorW_F 2 3 0 100 0

In fact, nearly all EMUs fall in cluster 2. Cluster 1 only corresponds to FR_Loir and cluster 3 to two bristish EMUs. There is no obvious spatial pattern nor period effect.

myclassif_p1 <- subset(myclassif, myclassif$period == 1)
myclassif_p2 <- subset(myclassif, myclassif$period == 2)
emu$cluster1 <- factor(myclassif_p1$cluster[match(emu$name_short,
                                                  substr(myclassif_p1$ser,1,nchar(as.character(myclassif_p1$ser))-2))],
                       levels=1:7)
emu$cluster2 <- factor(myclassif_p2$cluster[match(emu$name_short,
                                                substr(myclassif_p2$ser,1,nchar(as.character(myclassif_p2$ser))-2))],
                       levels=1:7)
ggplot(data = cou) +  geom_sf(fill= "antiquewhite") +
        geom_sf(data=emu,aes(fill=cluster1)) + scale_fill_manual(values=cols)+
  theme_bw() +xlim(-20,30) + ylim(35,65) 

ggplot(data = cou) +  geom_sf(fill= "antiquewhite") +
        geom_sf(data=emu,aes(fill=cluster2)) + scale_fill_manual(values=cols)+
  theme_bw() +xlim(-20,30) + ylim(35,65)  

Exporting pattern per group

tmp <- as.matrix(as.mcmc.list(myfit_yelloweel_freshwater_landings))
name_col = colnames(tmp)

pattern_Yfresh_landings=do.call("rbind.data.frame",
                                lapply(seq_len(length(levels(groups))), function(g)
                                   median_pattern_group(g, group_name,tmp, "Y","landings", hty_code="F")))
save(pattern_Yfresh_landings,file="pattern_Yfresh_landings.rdata")

Similarity between and after 2010

#which groups have data in both periods
occ=table(unique(yelloweel_freshwater_wide[,c("emu_nameshort", "period")])[,1])
tocompare=names(occ)[which(occ>1)]

simi=sapply(tocompare, function(s){
  g=grep(s,group_name)
  esp1=tmp[,grep(paste("alpha_group\\[",g[1],",",sep=""),name_col)]
  esp2=tmp[,grep(paste("alpha_group\\[",g[2],",",sep=""),name_col)]
  quantile(apply(cbind(esp1,esp2),
                 1,
                 function(x) sum(pmin(x[1:12],x[13:24]))),
           probs=c(0.025,.5,.975))
})

similarity=data.frame(emu=tocompare,t(simi))

table_similarity(similarity)
similarity
EMU q2.5% median q97.5%
DE_Eide_F 0.52 0.70 0.84
DE_Elbe_F 0.61 0.76 0.87
DE_Schl_F 0.60 0.74 0.86

Potential effect of EMP and EU closures

ncar=nchar(group_name)
period=as.integer(substr(as.character(group_name),ncar,ncar))
blocks=strsplit(group_name,"_")
emus=sapply(blocks,function(x)paste(x[1],x[2],sep="_"))
hty_code=sapply(blocks,function(x) substr(x[3],1,nchar(x[3])-2))



#######EMP
list_period1=data.frame(emu_nameshort=emus[period==1])
list_period1$group=group_name[period==1]
list_period1$id_g=match(list_period1$group,group_name)
list_period1$hty_code=hty_code[period==1]
  
#we check that we have ladings data at least two years before the first EMP closures
list_period1$estimable=mapply(function(s,hty) {
  length(which(charac_EMP_closures$emu_nameshort==s 
               & grepl("Y",charac_EMP_closures$lfs_code) 
               & grepl(hty, charac_EMP_closures$hty_code)))>0},
  list_period1$emu_nameshort, list_period1$hty_code)

list_period1$estimable=list_period1$estimable &
(sapply(list_period1$id_g,function(e) min(yelloweel_freshwater_wide$season[group==e]))+2 <
mapply(function(e,hty) min(charac_EMP_closures$year[charac_EMP_closures$emu_nameshort==e &
                                                           grepl("Y",charac_EMP_closures$lfs_code) &
                                                    grepl(hty,charac_EMP_closures$hty_code)]),
       list_period1$emu_nameshort, list_period1$hty_code))

list_period1$lossq2.5=NA
list_period1$lossq50=NA
list_period1$lossq97.5=NA

res_closures=mapply(function(s,g,hty) {
  emu_closures <- EMP_closures %>%
    filter(emu_nameshort==s & grepl("Y",lfs_code) & grepl(hty, hty_code)) %>%
    group_by(emu_nameshort,month) %>%
    summarize(fishery_closure_percent=max(fishery_closure_percent))
  myalpha=tmp[,paste("alpha_group[",g,",",emu_closures$month,"]",sep="")]
  if (nrow(emu_closures)>1){
    loss=colSums(apply(myalpha,1,function(x) x*emu_closures$fishery_closure_percent/100))
  } else {
    loss=myalpha*emu_closures$fishery_closure_percent/100
  }
  quantile(loss,probs=c(0.025,.5,.975))
},as.character(list_period1$emu_nameshort[list_period1$estimable]),
list_period1$id_g[list_period1$estimable],
list_period1$hty[list_period1$estimable])

list_period1[list_period1$estimable, c("lossq2.5", "lossq50","lossq97.5")] =
  t(res_closures)

kable(list_period1[,c("emu_nameshort","lossq2.5","lossq50","lossq97.5")],
      col.names=c("emu","q2.5","median","q97.5"),
      caption="proportion of catch potentially lost because of EMP closure",
      digits=2)
proportion of catch potentially lost because of EMP closure
emu q2.5 median q97.5
DE_Eide NA NA NA
DE_Elbe NA NA NA
DE_Schl NA NA NA
FR_Garo 0.30 0.37 0.44
FR_Loir 0.40 0.48 0.56
FR_Rhon 0.28 0.36 0.46
IE_West NA NA NA
#######EU
list_period2=data.frame(emu_nameshort=emus[period==2])
list_period2$group=group_name[period==2]
list_period2$id_g=match(list_period2$group,group_name)
list_period2$hty_code=hty_code[period==2]
  
#we check that we have ladings data at least two years before the first EMP closures
list_period2$estimable=mapply(function(s,hty) {
  length(which(charac_EU_closures$emu_nameshort==s 
               & grepl("Y",charac_EU_closures$lfs_code) 
               & grepl(hty, charac_EU_closures$hty_code)))>0},
  list_period2$emu_nameshort, list_period2$hty_code)

list_period2$estimable=list_period2$estimable &
(sapply(list_period2$id_g,function(e) min(yelloweel_freshwater_wide$season[group==e]))+2 <
mapply(function(e,hty) min(charac_EU_closures$year[charac_EU_closures$emu_nameshort==e &
                                                           grepl("Y",charac_EU_closures$lfs_code) &
                                                    grepl(hty,charac_EU_closures$hty_code)]),
       list_period2$emu_nameshort, list_period2$hty_code))

list_period2$lossq2.5=NA
list_period2$lossq50=NA
list_period2$lossq97.5=NA

res_closures=mapply(function(s,g,hty) {
  emu_closures <- EU_closures %>%
    filter(emu_nameshort==s & grepl("Y", lfs_code) & grepl(hty,hty_code)) %>%
    group_by(emu_nameshort,month) %>%
    summarize(fishery_closure_percent=max(fishery_closure_percent))
  myalpha=tmp[,paste("alpha_group[",g,",",emu_closures$month,"]",sep="")]
  if (nrow(emu_closures)>1){
    loss=colSums(apply(myalpha,1,function(x) x*emu_closures$fishery_closure_percent/100))
  } else {
    loss=myalpha*emu_closures$fishery_closure_percent/100
  }
  quantile(loss,probs=c(0.025,.5,.975))
},as.character(list_period2$emu_nameshort[list_period2$estimable]),
list_period2$id_g[list_period2$estimable],
list_period2$hty_code[list_period2$estimable])

list_period2[list_period2$estimable, c("lossq2.5", "lossq50","lossq97.5")] =
  t(res_closures)

kable(list_period2[,c("emu_nameshort","lossq2.5","lossq50","lossq97.5")],
      col.names=c("emu","q2.5","median","q97.5"),
      caption="proportion of catch potentially lost because of EU closure",
      digits=2)
proportion of catch potentially lost because of EU closure
emu q2.5 median q97.5
DE_Eide NA NA NA
DE_Elbe NA NA NA
DE_Schl NA NA NA
DE_Warn NA NA NA
GB_Angl 0.01 0.03 0.05
GB_Dee 0.01 0.02 0.04
GB_NorW 0.01 0.01 0.03
GB_Tham 0.00 0.01 0.02
list_period2$type="EU closure"
list_period1$type="EMP closure"
list_period=rbind.data.frame(list_period1,list_period2)
list_period$stage="Y"
save(list_period,file="loss_yellowfresh.rdata")

All habitats

Data selection

Now we should carry out data selection, more specifically, we want to eliminate rows with two many missing data, too much zero and to check whether there are no duplicates (though Cedric already did it)

yelloweel_allhab <- yelloweel
kept_seasons <- lapply(unique(yelloweel_allhab$emu_nameshort), function(s){
  sub_yellow <- subset(yelloweel_allhab, yelloweel_allhab$emu_nameshort==s)
  kept <- good_coverage_wave(sub_yellow)
  #we remove season in which we have less than 50 kg of landings
  if(!is.null(kept))
    kept <- kept[sapply(kept,function(k)
      sum(sub_yellow$das_value[sub_yellow$season==k],na.rm=TRUE)>50)]
  if (length(kept) == 0) kept <- NULL
  kept
})
## [1] "For  DE_Eide_C  a good season should cover months: 5 to 11"
## [1] "For  DE_Eide_F  a good season should cover months: 4 to 10"
## [1] "For  DE_Eide_T  a good season should cover months: 4 to 11"
## [1] "For  DE_Elbe_F  a good season should cover months: 4 to 11"
## [1] "For  DE_Elbe_T  a good season should cover months: 4 to 11"
## [1] "For  DE_Schl_C  a good season should cover months: 5 to 11"
## [1] "For  DE_Schl_F  a good season should cover months: 4 to 11"
## [1] "For  DE_Warn_F  a good season should cover months: 3 to 10"
## [1] "For  DK_total_MO  a good season should cover months: 4 to 11"
## [1] "For  ES_Murc_C  a good season should cover months: 11 to 3"
## [1] "For FR_Adou_F not possible to define a season"
## [1] "For  FR_Adou_T  a good season should cover months: 4 to 8"
## [1] "For  FR_Arto_T  a good season should cover months: 6 to 11"
## [1] "For  FR_Bret_T  a good season should cover months: 3 to 9"
## [1] "For  FR_Cors_T  a good season should cover months: 3 to 11"
## [1] "For  FR_Garo_F  a good season should cover months: 4 to 11"
## [1] "For  FR_Garo_T  a good season should cover months: 4 to 11"
## [1] "For  FR_Loir_F  a good season should cover months: 3 to 12"
## [1] "For  FR_Loir_T  a good season should cover months: 5 to 11"
## [1] "For FR_Rhin_F not possible to define a season"
## [1] "For  FR_Rhon_F  a good season should cover months: 3 to 11"
## [1] "For  FR_Sein_F  a good season should cover months: 3 to 11"
## [1] "For  FR_Sein_T  a good season should cover months: 4 to 12"
## [1] "For  GB_Angl_C  a good season should cover months: 5 to 11"
## [1] "For  GB_Angl_F  a good season should cover months: 4 to 11"
## [1] "For  GB_Dee_F  a good season should cover months: 6 to 10"
## [1] "For GB_Dee_T not possible to define a season"
## [1] "For  GB_Humb_F  a good season should cover months: 12 to 10"
## [1] "For  GB_NorW_C  a good season should cover months: 5 to 8"
## [1] "For  GB_NorW_F  a good season should cover months: 5 to 11"
## [1] "For  GB_SouE_C  a good season should cover months: 4 to 10"
## [1] "For  GB_SouE_F  a good season should cover months: 12 to 11"
## [1] "For  GB_SouW_C  a good season should cover months: 4 to 11"
## [1] "For  GB_SouW_F  a good season should cover months: 11 to 10"
## [1] "For  GB_Tham_F  a good season should cover months: 5 to 11"
## [1] "For  GB_Tham_C  a good season should cover months: 5 to 4"
## [1] "For  GB_Wale_F  a good season should cover months: 11 to 10"
## [1] "For IE_East_F not possible to define a season"
## [1] "For  IE_West_F  a good season should cover months: 5 to 12"
## [1] "For  NO_total_T  a good season should cover months: 5 to 11"
## [1] "For  SE_East_C  a good season should cover months: 4 to 11"
## [1] "For  SE_Inla_F  a good season should cover months: 12 to 9"
## [1] "For  SE_West_C  a good season should cover months: 5 to 11"

Finally, here are the series kept given previous criterion.

names(kept_seasons) <- unique(yelloweel_allhab$emu_nameshort)
kept_seasons[!sapply(kept_seasons,is.null)]
## $DE_Eide_C
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $DE_Eide_F
## [1] 2009 2010
## 
## $DE_Eide_T
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $DE_Elbe_F
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $DE_Elbe_T
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $DE_Schl_C
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $DE_Schl_F
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $DE_Warn_F
## [1] 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $DK_total_MO
##  [1] 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
## [15] 2014 2015 2016 2017 2018 2019
## 
## $ES_Murc_C
## [1] 2014 2016
## 
## $FR_Adou_T
## [1] 2009 2011 2013 2014 2015 2016 2018
## 
## $FR_Arto_T
## [1] 2009
## 
## $FR_Bret_T
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $FR_Cors_T
## [1] 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $FR_Garo_F
## [1] 2000 2001 2002 2003 2007 2008 2009
## 
## $FR_Garo_T
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $FR_Loir_F
## [1] 2000 2001 2002
## 
## $FR_Loir_T
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $FR_Rhon_F
## [1] 2001 2002
## 
## $FR_Sein_T
## [1] 2009 2010 2015
## 
## $GB_Angl_C
## [1] 2014 2015 2016 2017 2018
## 
## $GB_Angl_F
## [1] 2014 2015 2016 2017 2018
## 
## $GB_Dee_F
## [1] 2014 2016 2017 2018
## 
## $GB_NorW_F
## [1] 2013 2014 2015 2016 2017
## 
## $GB_SouE_C
## [1] 2013 2014 2015 2016 2017
## 
## $GB_SouW_C
## [1] 2013 2014 2015 2016 2017
## 
## $GB_Tham_F
## [1] 2014 2015 2016 2017
## 
## $IE_West_F
## [1] 2006
## 
## $NO_total_T
## [1] 2001
## 
## $SE_East_C
##  [1] 2000 2001 2002 2003 2004 2005 2006 2007 2008 2012 2013
## 
## $SE_West_C
## [1] 2000 2001 2002 2003 2004 2005 2006 2007 2008

Data preparation

We carry out the same procedure as for seasonality.

yelloweel_allhab_subset <- subset(yelloweel_allhab, 
                           mapply(function(season, series){
                             season %in% kept_seasons[[series]]
                           }, yelloweel_allhab$season, yelloweel_allhab$emu_nameshort))


yelloweel_allhab_wide <- pivot_wider(data=yelloweel_allhab_subset[, c("emu_nameshort",
                                                     "cou_code",
                                                     "season",
                                                     "das_month",
                                                     "das_value")],
                                names_from="das_month",
                                values_from="das_value")
names(yelloweel_allhab_wide)[-(1:3)] <- paste("m",
                                       names(yelloweel_allhab_wide)[-(1:3)],
                                       sep="")

###we count the number of zeros per lines to remove lines without enough
###fishes
data_poor <- data.frame(yelloweel_allhab_wide$emu_nameshort,
                        yelloweel_allhab_wide$season,
                  zero=rowSums(yelloweel_allhab_wide[, -(1:3)] == 0 |
                                 is.na(yelloweel_allhab_wide[, -(1:3)])),
           tot=rowSums(yelloweel_allhab_wide[, -(1:3)], na.rm=TRUE))
yelloweel_allhab_wide <- yelloweel_allhab_wide[data_poor$zero < 10 & data_poor$tot>50, ]
table_datapoor(data_poor %>% filter(zero > 9 | tot<50)) #we remove years where we have less than 2 months)
“data poor”" situation
EMU season number of zero total catch
ES_Murc_C 2014 10 2623
FR_Adou_T 2013 11 294
FR_Arto_T 2009 11 330
FR_Sein_T 2015 10 475
GB_SouE_C 2013 11 64

It leads to a dataset with 205 rows.

We now replace NA value per zero since we selected our dataseries with missing months corresponding to insignificant months / closed months, and we compute proportions per month for each year.

yelloweel_allhab_wide <- yelloweel_allhab_wide %>%
  replace_na(replace=list(m1=0,
                          m2=0,
                          m3=0,
                          m4=0,
                          m5=0,
                          m6=0,
                          m7=0,
                          m8=0,
                          m9=0,
                          m10=0,
                          m11=0,
                          m12=0))
yelloweel_allhab_wide[, -(1:3)] <- yelloweel_allhab_wide[, -(1:3)] + 1e-3
total_catch_year <- rowSums(yelloweel_allhab_wide[, paste("m", 1:12, sep="")])
yelloweel_allhab_wide <- yelloweel_allhab_wide %>%
  mutate_at(.vars=paste("m",1:12,sep=""),function(x) x/total_catch_year)

The Commission asks us to compare the pattern before and after 2007, probably to see the effect of the Eel Regulation. It is therefore necessary to build a period index. However, since most countries implemented their EMPs only in 2009/2010, we split in 2010.

yelloweel_allhab_wide$period <- ifelse(yelloweel_allhab_wide$season>2009,
                                  2,
                                  1)

kable(table(yelloweel_allhab_wide$period,
       yelloweel_allhab_wide$emu_nameshort),
      row.names=TRUE,caption="number of seasons per EMU and period")
number of seasons per EMU and period
DE_Eide_C DE_Eide_F DE_Eide_T DE_Elbe_F DE_Elbe_T DE_Schl_C DE_Schl_F DE_Warn_F DK_total_MO ES_Murc_C FR_Adou_T FR_Bret_T FR_Cors_T FR_Garo_F FR_Garo_T FR_Loir_F FR_Loir_T FR_Rhon_F FR_Sein_T GB_Angl_C GB_Angl_F GB_Dee_F GB_NorW_F GB_SouE_C GB_SouW_C GB_Tham_F IE_West_F NO_total_T SE_East_C SE_West_C
1 1 1 1 1 1 1 1 0 10 0 1 1 0 7 1 3 1 2 1 0 0 0 0 0 0 0 1 1 9 9
2 9 1 9 9 9 9 9 9 10 1 5 9 9 0 9 0 9 0 1 5 5 4 5 4 5 4 0 0 2 0

The situation is not well balanced. Most EMU which have data in periods 1 don’t have data in period 2 and conversely.

Running the model

group <- as.integer(interaction(yelloweel_allhab_wide$emu_nameshort,
                                            yelloweel_allhab_wide$period,
                                            drop=TRUE))
nb_occ_group <- table(group)
y <-as.matrix(yelloweel_allhab_wide[, paste("m", 1:12, sep="")])

Now, we make a loop to select the number of clusters based on a DIC criterion

cl <- makeCluster(3, 'FORK')
comparison <- parSapply(cl,2:7,
       function(nbclus){
         mydata <- build_data(nbclus)
         adapted <- FALSE
         while (!adapted){
           tryCatch({
             runjags.options(adapt.incomplete="error")
             res <- run.jags("jags_model.txt", monitor= c("deviance",
                                                          "alpha_group",
                                                          "cluster"),
                        summarise=FALSE, adapt=40000, method="parallel",
                        sample=2000,burnin=100000,n.chains=1,
                        inits=generate_init(nbclus, mydata)[[1]],
                        data=mydata)
                        adapted <- TRUE
                        res_mat <- as.matrix(as.mcmc.list(res))
                        silhouette <- median(compute_silhouette(res_mat))
                        nbused <- apply(res_mat, 1, function(iter){
                          length(table(iter[grep("cluster",
                                                 names(iter))]))
                        })
                        dic <- mean(res_mat[,1])+0.5*var(res_mat[,1])
                        stats <- c(dic,silhouette,mean(nbused))
                  }, error=function(e) {
                    print(paste("not adapted, restarting nbclus",nbclus))
                    }
                  )
         }
         stats
      })
stopCluster(cl)
best_yelloweel_allhab_landings <- data.frame(nbclus=2:(ncol(comparison)+1),
                                              dic=comparison[1, ],
                                              silhouette=comparison[2, ],
                                              used=comparison[3, ])
save(best_yelloweel_allhab_landings, file="yelloweel_allhab_landings_jags.rdata")
load("yelloweel_allhab_landings_jags.rdata")
best_yelloweel_allhab_landings
##   nbclus       dic silhouette used
## 1      2 -39569.75 0.15070626    2
## 2      3 -39533.64 0.35762186    3
## 3      4 -40311.51 0.10330834    4
## 4      5 -40640.97 0.09872620    5
## 5      6 -40877.49 0.12790819    6
## 6      7 -41046.74 0.05064337    7

The number of clusters used keep increasing, there is a good silhouette and DIC at 6.

nbclus <- 6
mydata <-build_data(6)
adapted <- FALSE
while (!adapted){
   tryCatch({
      runjags.options(adapt.incomplete="error")
      myfit_yelloweel_allhab_landings <- run.jags("jags_model.txt", monitor= c("cluster", "esp", "alpha_group",
                                            "cluster", "centroid",
                                            "centroid_group",
                                            "distToClust", "duration_clus",
                                            "duration_group",
                                            "lambda","id_cluster",
                                            "centroid"),
                      summarise=FALSE, adapt=20000, method="parallel",
                      sample=10000,burnin=200000,n.chains=1, thin=5,
                      inits=generate_init(nbclus, mydata)[[1]], data=mydata)
      adapted <- TRUE
    }, error=function(e) {
       print(paste("not adapted, restarting nbclus",nbclus))
    })
}


save(myfit_yelloweel_allhab_landings, best_yelloweel_allhab_landings,
     file="yelloweel_allhab_landings_jags.rdata")

Results

Once fitted, we can plot monthly pattern per cluster

load("yelloweel_allhab_landings_jags.rdata")
nbclus <- 6
mydata <-build_data(6)
get_pattern_month <- function(res,type="cluster"){
  res_mat <- as.matrix(as.mcmc.list(res, add.mutate=FALSE))
  if (type=="cluster"){
    sub_mat <- as.data.frame(res_mat[,grep("esp",colnames(res_mat))])
  }
  sub_mat <- sub_mat %>% 
    pivot_longer(cols=1:ncol(sub_mat),
                 names_to="param",
                 values_to="proportion")
  tmp <- lapply(as.character(sub_mat$param),function(p) strsplit(p,"[[:punct:]]"))
  sub_mat$cluster<-as.factor(
    as.integer(lapply(tmp, function(tt) tt[[1]][2])))
  sub_mat$month <- as.character(lapply(tmp,
                                       function(tt) paste("m",
                                                          tt[[1]][3],
                                                          sep="")))
  sub_mat$month <- factor(sub_mat$month, levels=paste("m", 1:12, sep=""))
  sub_mat
}

pat <-get_pattern_month(myfit_yelloweel_allhab_landings)
pat$cluster <- factor(match(pat$cluster, c("1","2","5","3","6","4")),
                       levels=as.character(1:7))

ggplot(pat,aes(x=month,y=proportion))+
  geom_boxplot(aes(fill=cluster),outlier.shape=NA) +
  scale_fill_manual(values=cols)+facet_wrap(.~cluster, ncol=1) +
  theme_bw()

Cluster 1 peaks in winter, 2 in spring, 3 in spring/summer, 5 is wisepread from april to november and 6 peaks in autumn (after a small peak in spring).

We compute some statistics to characterize the clusters.

table_characteristics(myfit_yelloweel_allhab_landings, 6)
characteristics of clusters
number of months to reach 80% of total
month of centroid
cluster median q2.5% q97.5% median q2.5% q97.5%
1 3 2 3 2.44 2.01 2.83
2 3 2 3 5.45 5.26 5.64
3 3 2 3 7.51 7.39 7.64
4 5 4 5 11.32 11.03 11.64
5 5 4 5 6.35 6.17 6.51
6 6 6 6 7.86 7.78 7.93

Duration indicates the minimum number of months that covers 80% of the wave (1st column is the median, and the 2 next one quantiles 2.5% and 97.5% of credibility intervals). Centroid is the centroid of the migration wave (e.g. 11.5 would indicate a migration centred around mid november). The first column is the median and the two next one the quantiles 2.5 and 97.5%.

We can also look at the belonging of the different groups.

get_pattern_month <- function(res,mydata){
  
  groups <- interaction(yelloweel_allhab_wide$emu_nameshort,
                                            yelloweel_allhab_wide$period,
                                            drop=TRUE)
  group_name <- levels(groups)
  tmp <- strsplit(as.character(group_name),
                  "\\.")
  ser <- as.character(lapply(tmp,function(tt){
    tt[1]
  }))
  period <- as.character(lapply(tmp,function(tt){
    tt[2]
  }))
  res_mat <- as.matrix(as.mcmc.list(res,add.mutate=FALSE))
  
  clus <- t(sapply(seq_len(length(unique(groups))), function(id){
    name_col <- paste("cluster[",id,"]",sep="")
    freq <- table(res_mat[,name_col])
    max_class <- names(freq)[order(freq,decreasing=TRUE)[1]]
    c(max_class,freq[as.character(1:nbclus)])
  }))
  storage.mode(clus) <- "numeric"
  classes <- as.data.frame(clus)
  names(classes) <- c("cluster", paste("clus",seq_len(nbclus),sep=""))
  cbind.data.frame(data.frame(ser=ser, period=period),
                   classes)
}

myclassif <- get_pattern_month(myfit_yelloweel_allhab_landings)
myclassif$cluster <- factor(match(myclassif$cluster, c("1","2","5","3","6","4")),
                       levels=as.character(1:7))

table_classif(myclassif)
EMU period Max cluster % clus 1 % clus 2 % clus 3 % clus 4 % clus 5 % clus 6
ES_Murc_C 2 1 100 0 0 0 0 0
FR_Adou_T 1 2 0 100 0 0 0 0
FR_Adou_T 2 2 0 100 0 0 0 0
FR_Sein_T 1 2 0 100 0 0 0 0
DE_Eide_F 1 3 0 0 0 0 91 9
DE_Eide_F 2 3 0 0 0 0 76 24
DE_Elbe_F 1 3 0 0 0 0 100 0
FR_Bret_T 1 3 0 0 43 0 51 7
FR_Bret_T 2 3 0 0 0 0 100 0
FR_Garo_T 2 3 0 0 0 0 100 0
GB_SouW_C 2 3 0 0 0 0 82 18
FR_Sein_T 2 4 0 0 100 0 0 0
GB_Dee_F 2 4 0 0 100 0 0 0
GB_SouE_C 2 4 0 0 100 0 0 0
DE_Eide_C 1 5 0 0 0 0 0 100
DE_Eide_C 2 5 0 0 0 0 0 100
DE_Eide_T 1 5 0 0 0 0 0 100
DE_Eide_T 2 5 0 0 0 0 0 100
DE_Elbe_F 2 5 0 0 0 0 3 97
DE_Elbe_T 1 5 0 0 1 0 0 99
DE_Elbe_T 2 5 0 0 0 0 0 100
DE_Schl_C 1 5 0 0 0 0 2 98
DE_Schl_C 2 5 0 0 0 0 0 100
DE_Schl_F 1 5 0 0 0 0 20 80
DE_Schl_F 2 5 0 0 0 0 0 100
DE_Warn_F 2 5 0 0 0 0 1 99
DK_total_MO 1 5 0 0 0 0 0 100
DK_total_MO 2 5 0 0 0 0 0 100
FR_Garo_F 1 5 0 0 0 0 0 100
FR_Garo_T 1 5 0 0 0 1 1 98
FR_Loir_F 1 5 0 0 0 0 1 99
FR_Loir_T 1 5 0 0 0 1 8 91
FR_Loir_T 2 5 0 0 0 0 0 100
FR_Rhon_F 1 5 0 0 0 0 2 98
GB_Angl_C 2 5 0 0 0 0 0 100
GB_Angl_F 2 5 0 0 0 0 0 100
GB_NorW_F 2 5 0 0 0 0 0 100
GB_Tham_F 2 5 0 0 0 0 0 100
IE_West_F 1 5 0 0 0 0 6 94
NO_total_T 1 5 0 0 0 0 0 100
SE_East_C 1 5 0 0 0 0 0 100
SE_East_C 2 5 0 0 0 0 0 100
SE_West_C 1 5 0 0 0 0 0 100
FR_Cors_T 2 6 0 0 0 100 0 0

Cluster 1 corresponds only to ES_Murc and cluster 6 to FR_Cors. Cluster 2 corresponds to French EMUs in transitional waters. Clusters 3 -5 are diverse. 5 accounts for French and Deutsh EMUs (T and F) and 6 to a large number of EMUs.

myplots <-lapply(c("MO","C","T", "F"),function(hty){
  myclassif_p1 <- subset(myclassif, myclassif$period == 1 &
                           endsWith(as.character(myclassif$ser),
                                    hty))
  myclassif_p2 <- subset(myclassif, myclassif$period == 2 &
                           endsWith(as.character(myclassif$ser),
                                    hty))
  emu$cluster1 <- factor(myclassif_p1$cluster[match(emu$name_short,                                                  substr(myclassif_p1$ser,1,nchar(as.character(myclassif_p1$ser))-2))],
                       levels=1:7)
  emu$cluster2 <- factor(myclassif_p2$cluster[match(emu$name_short,                                                substr(myclassif_p2$ser,1,nchar(as.character(myclassif_p2$ser))-2))],
                       levels=1:7)
  p1 <- ggplot(data = cou) +  geom_sf(fill= "antiquewhite") +
          geom_sf(data=emu,aes(fill=cluster1)) + scale_fill_manual(values=cols)+
      theme_bw() +xlim(-20,30) + ylim(35,65) +
    ggtitle(paste("period 1",hty))
  p2 <- ggplot(data = cou) +  geom_sf(fill= "antiquewhite") +
          geom_sf(data=emu,aes(fill=cluster2)) + scale_fill_manual(values=cols)+
    theme_bw() +xlim(-20,30) + ylim(35,65)  +
    ggtitle(paste("period 2",hty))
  return(list(p1,p2))
})
myplots <- do.call(c, myplots)
print(myplots[[1]][[1]])
## Simple feature collection with 54 features and 1 field
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -31.26575 ymin: 32.39748 xmax: 69.07032 ymax: 81.85737
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
## First 10 features:
##                  NAME                       geometry
## 1             Albania MULTIPOLYGON (((19.50115 40...
## 2             Andorra MULTIPOLYGON (((1.439922 42...
## 3             Austria MULTIPOLYGON (((16 48.77775...
## 4             Belgium MULTIPOLYGON (((5 49.79374,...
## 5  Bosnia Herzegovina MULTIPOLYGON (((19.22947 43...
## 6             Croatia MULTIPOLYGON (((14.30038 44...
## 7      Czech Republic MULTIPOLYGON (((14.82523 50...
## 8             Denmark MULTIPOLYGON (((11.99978 54...
## 9             Estonia MULTIPOLYGON (((23.97511 58...
## 10            Finland MULTIPOLYGON (((22.0731 60....
print(myplots[[1]][[2]])
## [[1]]
## mapping:  
## geom_sf: na.rm = FALSE, legend = polygon
## stat_sf: na.rm = FALSE
## position_identity 
## 
## [[2]]
## mapping: fill = ~cluster1 
## geom_sf: na.rm = FALSE, legend = polygon
## stat_sf: na.rm = FALSE
## position_identity
print(myplots[[2]][[1]])
## Simple feature collection with 54 features and 1 field
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -31.26575 ymin: 32.39748 xmax: 69.07032 ymax: 81.85737
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
## First 10 features:
##                  NAME                       geometry
## 1             Albania MULTIPOLYGON (((19.50115 40...
## 2             Andorra MULTIPOLYGON (((1.439922 42...
## 3             Austria MULTIPOLYGON (((16 48.77775...
## 4             Belgium MULTIPOLYGON (((5 49.79374,...
## 5  Bosnia Herzegovina MULTIPOLYGON (((19.22947 43...
## 6             Croatia MULTIPOLYGON (((14.30038 44...
## 7      Czech Republic MULTIPOLYGON (((14.82523 50...
## 8             Denmark MULTIPOLYGON (((11.99978 54...
## 9             Estonia MULTIPOLYGON (((23.97511 58...
## 10            Finland MULTIPOLYGON (((22.0731 60....
print(myplots[[2]][[2]])
## [[1]]
## mapping:  
## geom_sf: na.rm = FALSE, legend = polygon
## stat_sf: na.rm = FALSE
## position_identity 
## 
## [[2]]
## mapping: fill = ~cluster2 
## geom_sf: na.rm = FALSE, legend = polygon
## stat_sf: na.rm = FALSE
## position_identity
print(myplots[[3]][[1]])
## Simple feature collection with 54 features and 1 field
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -31.26575 ymin: 32.39748 xmax: 69.07032 ymax: 81.85737
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
## First 10 features:
##                  NAME                       geometry
## 1             Albania MULTIPOLYGON (((19.50115 40...
## 2             Andorra MULTIPOLYGON (((1.439922 42...
## 3             Austria MULTIPOLYGON (((16 48.77775...
## 4             Belgium MULTIPOLYGON (((5 49.79374,...
## 5  Bosnia Herzegovina MULTIPOLYGON (((19.22947 43...
## 6             Croatia MULTIPOLYGON (((14.30038 44...
## 7      Czech Republic MULTIPOLYGON (((14.82523 50...
## 8             Denmark MULTIPOLYGON (((11.99978 54...
## 9             Estonia MULTIPOLYGON (((23.97511 58...
## 10            Finland MULTIPOLYGON (((22.0731 60....
print(myplots[[3]][[2]])
## [[1]]
## mapping:  
## geom_sf: na.rm = FALSE, legend = polygon
## stat_sf: na.rm = FALSE
## position_identity 
## 
## [[2]]
## mapping: fill = ~cluster1 
## geom_sf: na.rm = FALSE, legend = polygon
## stat_sf: na.rm = FALSE
## position_identity
print(myplots[[4]][[1]])
## Simple feature collection with 54 features and 1 field
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -31.26575 ymin: 32.39748 xmax: 69.07032 ymax: 81.85737
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
## First 10 features:
##                  NAME                       geometry
## 1             Albania MULTIPOLYGON (((19.50115 40...
## 2             Andorra MULTIPOLYGON (((1.439922 42...
## 3             Austria MULTIPOLYGON (((16 48.77775...
## 4             Belgium MULTIPOLYGON (((5 49.79374,...
## 5  Bosnia Herzegovina MULTIPOLYGON (((19.22947 43...
## 6             Croatia MULTIPOLYGON (((14.30038 44...
## 7      Czech Republic MULTIPOLYGON (((14.82523 50...
## 8             Denmark MULTIPOLYGON (((11.99978 54...
## 9             Estonia MULTIPOLYGON (((23.97511 58...
## 10            Finland MULTIPOLYGON (((22.0731 60....
print(myplots[[4]][[2]])
## [[1]]
## mapping:  
## geom_sf: na.rm = FALSE, legend = polygon
## stat_sf: na.rm = FALSE
## position_identity 
## 
## [[2]]
## mapping: fill = ~cluster2 
## geom_sf: na.rm = FALSE, legend = polygon
## stat_sf: na.rm = FALSE
## position_identity

Silver eel

First, let’s select data corresponding to silver stage.

silver_eel <- subset(res, res$lfs_code=="S")

# we start by removing rows with only zero
all_zero <- silver_eel %>%  group_by(emu_nameshort,lfs_code,hty_code,das_year) %>%
        summarize(S=sum(das_value)) %>% 
    filter(S==0)

silver_eel <- silver_eel %>% 
      anti_join(all_zero)
## Joining, by = c("das_year", "emu_nameshort", "lfs_code", "hty_code")
table(silver_eel$hty_code)
## 
##   C   F  FC  MO   T 
## 606 961 463 239 354
#We have many data, so we remove "FC" and "FTC" which are weirds mixes
silver_eel <- silver_eel %>%
  filter(!hty_code %in% c("FTC", "FC"))

#in this analysis, the unit will correspond to EMU / habitat so we create 
#corresponding column
silver_eel$emu <- silver_eel$emu_nameshort
silver_eel$emu_nameshort <- paste(silver_eel$emu_nameshort,
                                   silver_eel$hty_code, sep="_")


#There are some duplicates for IE_West_F that should be summed up according to
#Russel
summed_up_IE <-silver_eel %>%
  filter(silver_eel$emu_nameshort=="IE_West_F") %>%
  group_by(das_year,das_month) %>%
  summarize(das_value=sum(das_value))

silver_eel <- silver_eel %>% 
  distinct(das_year,das_month,emu_nameshort, .keep_all = TRUE)

silver_eel[silver_eel$emu_nameshort=="IE_West_F",
          c("das_year","das_month","das_value") ] <- summed_up_IE

Similarly to seasonality, we will build season. We reuse the procedure made for silver eel and silver eel seasonality, i.e. defining seasons per emu, with the season starting at the month with minimum landings. The month with lowest catch fmin define the beggining of the season (month_in_season=1) and season y stands for the 12 months from fmin y (e.g., if lowest migration is in december, season ranges from december to november, and season y denotes season from december y to november y+1).

#creating season
silvereel <- do.call("rbind.data.frame",
                     lapply(unique(silver_eel$emu_nameshort),
                            function(s)
                              season_creation(silver_eel[silver_eel$emu_nameshort==s,])))
months_peak_per_series<- unique(silvereel[,c("emu_nameshort","peak_month")])

#large variety in the month with peak of catches among EMU / habitat
kable(table(months_peak_per_series$peak_month),
      col.names=c("month","number of EMUs"),
      caption="number of EMUs peaking in a given months")
number of EMUs peaking in a given months
month number of EMUs
4 1
5 1
6 1
8 1
9 10
10 9
11 4
12 4
#we remove data from season 2020
silvereel <- silvereel %>%
  filter(season < 2020)

Looking at the data, it seems that there are few silver eel fisheries in transitional and marine open waters, therefore, we will make an analysis for freshwater and 1 for all other environments.

table(unique(silvereel[,c("hty_code","emu_nameshort")])$hty_code)
## 
##  C  F MO  T 
## 10 16  1  4

marine open, coastal and transitional waters

Data selection

Now we should carry out data selection, more specifically, we want to eliminate rows with two many missing data, too much zero and to check whether there are no duplicates (though Cedric already did it)

silvereel_coastal <- subset(silvereel, silvereel$hty_code !="F")
kept_seasons <- lapply(unique(silvereel_coastal$emu_nameshort), function(s){
  sub_silver <- subset(silvereel_coastal, silvereel_coastal$emu_nameshort==s)
  kept <- good_coverage_wave(sub_silver)
  #we remove season in which we have less than 50 kg of landings
  if(!is.null(kept))
    kept <- kept[sapply(kept,function(k)
      sum(sub_silver$das_value[sub_silver$season==k],na.rm=TRUE)>50)]
  if (length(kept) == 0) kept <- NULL
  kept
})
## [1] "For  DE_Eide_C  a good season should cover months: 5 to 11"
## [1] "For  DE_Eide_T  a good season should cover months: 8 to 11"
## [1] "For  DE_Elbe_T  a good season should cover months: 5 to 11"
## [1] "For  DE_Schl_C  a good season should cover months: 7 to 12"
## [1] "For  DK_total_MO  a good season should cover months: 8 to 12"
## [1] "For  ES_Murc_C  a good season should cover months: 11 to 3"
## [1] "For  FR_Cors_T  a good season should cover months: 9 to 2"
## [1] "For  GB_Angl_C  a good season should cover months: 5 to 11"
## [1] "For GB_Dee_T not possible to define a season"
## [1] "For  GB_NorW_C  a good season should cover months: 5 to 9"
## [1] "For  GB_SouE_C  a good season should cover months: 6 to 11"
## [1] "For  GB_SouW_C  a good season should cover months: 6 to 11"
## [1] "For  GB_Tham_C  a good season should cover months: 5 to 4"
## [1] "For  SE_East_C  a good season should cover months: 7 to 12"
## [1] "For  SE_West_C  a good season should cover months: 5 to 11"

Finally, here are the series kept given previous criterion.

names(kept_seasons) <- unique(silvereel_coastal$emu_nameshort)
kept_seasons[!sapply(kept_seasons,is.null)]
## $DE_Eide_C
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $DE_Eide_T
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $DE_Elbe_T
## [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017
## 
## $DE_Schl_C
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $DK_total_MO
##  [1] 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
## [15] 2014 2015 2016 2017 2018 2019
## 
## $ES_Murc_C
## [1] 2014 2016
## 
## $FR_Cors_T
## [1] 2010 2011 2012 2013 2014 2015 2016 2017
## 
## $GB_Angl_C
## [1] 2014 2015 2016 2017 2018
## 
## $GB_SouE_C
## [1] 2015 2016
## 
## $GB_SouW_C
## [1] 2014 2016 2017 2018
## 
## $SE_East_C
##  [1] 2000 2001 2002 2003 2004 2005 2006 2007 2008 2012 2013 2014 2015 2016
## [15] 2017
## 
## $SE_West_C
## [1] 2000 2001 2002 2003 2004 2005 2006 2007

Data preparation

We carry out the same procedure as for seasonality.

silvereel_coastal_subset <- subset(silvereel_coastal, 
                           mapply(function(season, series){
                             season %in% kept_seasons[[series]]
                           }, silvereel_coastal$season, silvereel_coastal$emu_nameshort))


silvereel_coastal_wide <- pivot_wider(data=silvereel_coastal_subset[, c("emu_nameshort",
                                                     "cou_code",
                                                     "season",
                                                     "das_month",
                                                     "das_value")],
                                names_from="das_month",
                                values_from="das_value")
names(silvereel_coastal_wide)[-(1:3)] <- paste("m",
                                       names(silvereel_coastal_wide)[-(1:3)],
                                       sep="")

###we count the number of zeros per lines to remove lines without enough
###fishes
data_poor <- data.frame(silvereel_coastal_wide$emu_nameshort,
                        silvereel_coastal_wide$season,
                  zero=rowSums(silvereel_coastal_wide[, -(1:3)] == 0 |
                                 is.na(silvereel_coastal_wide[, -(1:3)])),
           tot=rowSums(silvereel_coastal_wide[, -(1:3)], na.rm=TRUE))
silvereel_coastal_wide <- silvereel_coastal_wide[data_poor$zero < 10 
                                                   & data_poor$tot>50, ]

table_datapoor(data_poor %>% filter(zero > 9 | tot<50)) #we remove years where we have less than 2 months)
“data poor”" situation
EMU season number of zero total catch
DE_Eide_T 2018 10 126.5
ES_Murc_C 2014 10 3299.0
GB_Angl_C 2015 10 303.5
GB_Angl_C 2016 10 62.0
GB_Angl_C 2017 10 149.0
GB_Angl_C 2018 10 149.0

It leads to a dataset with 97 rows.

We now replace NA value per zero since we selected our dataseries with missing months corresponding to insignificant months / closed months, and we compute proportions per month for each year.

silvereel_coastal_wide <- silvereel_coastal_wide %>%
  replace_na(replace=list(m1=0,
                          m2=0,
                          m3=0,
                          m4=0,
                          m5=0,
                          m6=0,
                          m7=0,
                          m8=0,
                          m9=0,
                          m10=0,
                          m11=0,
                          m12=0))
silvereel_coastal_wide[, -(1:3)] <- silvereel_coastal_wide[, -(1:3)] + 1e-3
total_catch_year <- rowSums(silvereel_coastal_wide[, paste("m", 1:12, sep="")])
silvereel_coastal_wide <- silvereel_coastal_wide %>%
  mutate_at(.vars=paste("m",1:12,sep=""),function(x) x/total_catch_year)

The Commission asks us to compare the pattern before and after 2007, probably to see the effect of the Eel Regulation. It is therefore necessary to build a period index. However, since most countries implemented their EMPs only in 2009/2010, we split in 2010.

silvereel_coastal_wide$period <- ifelse(silvereel_coastal_wide$season>2009,
                                  2,
                                  1)

kable(table(silvereel_coastal_wide$period,
       silvereel_coastal_wide$emu_nameshort),
      row.names=TRUE,
      caption="number of seasons per EMU and period")
number of seasons per EMU and period
DE_Eide_C DE_Eide_T DE_Elbe_T DE_Schl_C DK_total_MO ES_Murc_C FR_Cors_T GB_Angl_C GB_SouE_C GB_SouW_C SE_East_C SE_West_C
1 1 1 1 1 10 0 0 0 0 0 9 8
2 9 8 8 9 10 1 8 1 2 4 6 0

The situation is not well balanced. Most EMU which have data in periods 1 don’t have data in period 2 and conversely.

Running the model

group <- as.integer(interaction(silvereel_coastal_wide$emu_nameshort,
                                            silvereel_coastal_wide$period,
                                            drop=TRUE))
nb_occ_group <- table(group)
y <-as.matrix(silvereel_coastal_wide[, paste("m", 1:12, sep="")])

Now, we make a loop to select the number of clusters based on a DIC criterion

cl <- makeCluster(3, 'FORK')
comparison <- parSapply(cl,2:7,
       function(nbclus){
         mydata <- build_data(nbclus)
         adapted <- FALSE
         while (!adapted){
           tryCatch({
             runjags.options(adapt.incomplete="error")
             res <- run.jags("jags_model.txt", monitor= c("deviance",
                                                          "alpha_group",
                                                          "cluster"),
                        summarise=FALSE, adapt=40000, method="parallel",
                        sample=2000,burnin=100000,n.chains=1,
                        inits=generate_init(nbclus, mydata)[[1]],
                        data=mydata)
                        adapted <- TRUE
                        res_mat <- as.matrix(as.mcmc.list(res))
                        silhouette <- median(compute_silhouette(res_mat))
                        nbused <- apply(res_mat, 1, function(iter){
                          length(table(iter[grep("cluster",
                                                 names(iter))]))
                        })
                        dic <- mean(res_mat[,1])+0.5*var(res_mat[,1])
                        stats <- c(dic,silhouette,mean(nbused))
                  }, error=function(e) {
                    print(paste("not adapted, restarting nbclus",nbclus))
                    }
                  )
         }
         stats
      })
stopCluster(cl)
best_silvereel_coastal_landings <- data.frame(nbclus=2:(ncol(comparison)+1),
                                              dic=comparison[1, ],
                                              silhouette=comparison[2, ],
                                              used=comparison[3,])
save(best_silvereel_coastal_landings, file="silvereel_coastal_landings_jags.rdata")
load("silvereel_coastal_landings_jags.rdata")
best_silvereel_coastal_landings
##   nbclus       dic silhouette  used
## 1      2 -20691.05 0.38812189 2.000
## 2      3 -21422.06 0.26739341 3.000
## 3      4 -21465.02 0.22607521 4.000
## 4      5 -21481.29 0.24429729 4.000
## 5      6 -21578.89 0.24482807 4.000
## 6      7 -21648.42 0.05884515 6.001

4 seem to be a good compromise (though only 3 clusters seem to be effectively used)

nbclus <- 4
mydata <-build_data(4)
adapted <- FALSE
while (!adapted){
   tryCatch({
      runjags.options(adapt.incomplete="error")
      myfit_silvereel_coastal_landings <- run.jags("jags_model.txt", monitor= c("cluster", "esp", "alpha_group",
                                            "cluster", "centroid",
                                            "centroid_group",
                                            "distToClust", "duration_clus",
                                            "duration_group",
                                            "lambda","id_cluster",
                                            "centroid"),
                      summarise=FALSE, adapt=20000, method="parallel",
                      sample=10000,burnin=200000,n.chains=1, thin=5,
                      inits=generate_init(nbclus, mydata)[[1]], data=mydata)
      adapted <- TRUE
    }, error=function(e) {
       print(paste("not adapted, restarting nbclus",nbclus))
    })
}


save(myfit_silvereel_coastal_landings, best_silvereel_coastal_landings,
     file="silvereel_coastal_landings_jags.rdata")

Results

Once fitted, we can plot monthly pattern per cluster

load("silvereel_coastal_landings_jags.rdata")
nbclus <- 4
mydata <-build_data(4)
get_pattern_month <- function(res,type="cluster"){
  res_mat <- as.matrix(as.mcmc.list(res, add.mutate=FALSE))
  if (type=="cluster"){
    sub_mat <- as.data.frame(res_mat[,grep("esp",colnames(res_mat))])
  }
  sub_mat <- sub_mat %>% 
    pivot_longer(cols=1:ncol(sub_mat),
                 names_to="param",
                 values_to="proportion")
  tmp <- lapply(as.character(sub_mat$param),function(p) strsplit(p,"[[:punct:]]"))
  sub_mat$cluster<-as.factor(
    as.integer(lapply(tmp, function(tt) tt[[1]][2])))
  sub_mat$month <- as.character(lapply(tmp,
                                       function(tt) paste("m",
                                                          tt[[1]][3],
                                                          sep="")))
  sub_mat$month <- factor(sub_mat$month, levels=paste("m", 1:12, sep=""))
  sub_mat
}

pat <-get_pattern_month(myfit_silvereel_coastal_landings)
pat$cluster <- factor(match(pat$cluster,c("1","3","4","2") ),
                      levels=as.character(1:7))
ggplot(pat,aes(x=month,y=proportion))+
  geom_boxplot(aes(fill=cluster),outlier.shape=NA)+facet_wrap(.~cluster, ncol=1) +
  theme_bw()

Clusters 3 and 4 correspond to peak in october with 3 more widespread. Cluster 1 corresponds to a peak in autumn/winter. Cluster 2 corresponds to catches in winter.

We compute some statistics to characterize the clusters.

table_characteristics(myfit_silvereel_coastal_landings, 4)
characteristics of clusters
number of months to reach 80% of total
month of centroid
cluster median q2.5% q97.5% median q2.5% q97.5%
1 3 3 4 0.20 0.04 0.37
2 2 2 3 9.60 9.51 9.73
3 3 3 4 1.90 1.48 2.33
4 4 4 5 9.31 9.16 9.39

Duration indicates the minimum number of months that covers 80% of the wave (1st column is the median, and the 2 next one quantiles 2.5% and 97.5% of credibility intervals). Centroid is the centroid of the migration wave (e.g. 11.5 would indicate a migration centred around mid november). The first column is the median and the two next one the quantiles 2.5 and 97.5%.

We can also look at the belonging of the different groups.

groups <- interaction(silvereel_coastal_wide$emu_nameshort,
                                            silvereel_coastal_wide$period,
                                            drop=TRUE)
group_name <- levels(groups)

get_pattern_month <- function(res,mydata){
  
  tmp <- strsplit(as.character(group_name),
                  "\\.")
  ser <- as.character(lapply(tmp,function(tt){
    tt[1]
  }))
  period <- as.character(lapply(tmp,function(tt){
    tt[2]
  }))
  res_mat <- as.matrix(as.mcmc.list(res,add.mutate=FALSE))
  
  clus <- t(sapply(seq_len(length(unique(groups))), function(id){
    name_col <- paste("cluster[",id,"]",sep="")
    freq <- table(res_mat[,name_col])
    max_class <- names(freq)[order(freq,decreasing=TRUE)[1]]
    c(max_class,freq[as.character(1:nbclus)])
  }))
  storage.mode(clus) <- "numeric"
  classes <- as.data.frame(clus)
  names(classes) <- c("cluster", paste("clus",seq_len(nbclus),sep=""))
  cbind.data.frame(data.frame(ser=ser, period=period),
                   classes)
}

myclassif <- get_pattern_month(myfit_silvereel_coastal_landings)
myclassif$cluster <- factor(match(myclassif$cluster,c("1","3","4","2") ),
                      levels=as.character(1:7))
table_classif(myclassif)
EMU period Max cluster % clus 1 % clus 2 % clus 3 % clus 4
FR_Cors_T 2 1 100 0 0 0
ES_Murc_C 2 2 0 0 100 0
DE_Eide_C 1 3 0 0 0 100
DE_Eide_C 2 3 0 0 0 100
DE_Elbe_T 1 3 0 0 0 100
DE_Elbe_T 2 3 0 0 0 100
DE_Schl_C 1 3 0 12 0 88
DE_Schl_C 2 3 0 0 0 100
DK_total_MO 1 3 0 0 0 100
DK_total_MO 2 3 0 9 0 91
SE_East_C 1 3 0 0 0 100
SE_East_C 2 3 0 0 0 100
SE_West_C 1 3 0 0 0 100
DE_Eide_T 1 4 0 100 0 0
DE_Eide_T 2 4 0 100 0 0
GB_Angl_C 2 4 0 100 0 0
GB_SouE_C 2 4 0 100 0 0
GB_SouW_C 2 4 0 100 0 0

In fact, most EMUs fall in cluster 3. Cluster 2 corresponds only to ES_Murc_C (same as for yellow eel) and one for FR_Cors. Cluster 4 (limited fishing season) regroups GB and DE EMUs.

myplots <-lapply(c("MO","C","T"),function(hty){
  myclassif_p1 <- subset(myclassif, myclassif$period == 1 &
                           endsWith(as.character(myclassif$ser),
                                    hty))
  myclassif_p2 <- subset(myclassif, myclassif$period == 2 &
                           endsWith(as.character(myclassif$ser),
                                    hty))
  emu$cluster1 <- factor(myclassif_p1$cluster[match(emu$name_short,                                                  gsub(paste("_",hty,sep=""),"",myclassif_p1$ser))],
                       levels=1:7)
  emu$cluster2 <- factor(myclassif_p2$cluster[match(emu$name_short,                                                gsub(paste("_",hty,sep=""),"",myclassif_p2$ser))],
                       levels=1:7)
  p1 <- ggplot(data = cou) +  geom_sf(fill= "antiquewhite") +
          geom_sf(data=emu,aes(fill=cluster1)) + scale_fill_manual(values=cols)+
      theme_bw() +xlim(-20,30) + ylim(35,65) +
    ggtitle(paste("period 1",hty))
  p2 <- ggplot(data = cou) +  geom_sf(fill= "antiquewhite") +
          geom_sf(data=emu,aes(fill=cluster2)) + scale_fill_manual(values=cols)+
    theme_bw() +xlim(-20,30) + ylim(35,65)  +
    ggtitle(paste("period 2",hty))
  return(list(p1,p2))
})
print(myplots[[2]][[1]])

print(myplots[[2]][[2]])

print(myplots[[3]][[1]])

print(myplots[[3]][[2]])

Exporting pattern per group

tmp <- as.matrix(as.mcmc.list(myfit_silvereel_coastal_landings))
name_col = colnames(tmp)

pattern_Smar_coast_trans_landings=do.call("rbind.data.frame",
                                lapply(seq_len(length(levels(groups))), function(g)
                                   median_pattern_group(g, group_name,tmp, "S","landings")))
save(pattern_Smar_coast_trans_landings,file="pattern_Smar_coast_trans_landings.rdata")

Similarity between and after 2010

#which groups have data in both periods
occ=table(unique(silvereel_coastal_wide[,c("emu_nameshort", "period")])[,1])
tocompare=names(occ)[which(occ>1)]

simi=sapply(tocompare, function(s){
  g=grep(s,group_name)
  esp1=tmp[,grep(paste("alpha_group\\[",g[1],",",sep=""),name_col)]
  esp2=tmp[,grep(paste("alpha_group\\[",g[2],",",sep=""),name_col)]
  quantile(apply(cbind(esp1,esp2),
                 1,
                 function(x) sum(pmin(x[1:12],x[13:24]))),
           probs=c(0.025,.5,.975))
})

similarity=data.frame(emu=tocompare,t(simi))

table_similarity(similarity)
similarity
EMU q2.5% median q97.5%
DE_Eide_C 0.56 0.71 0.85
DE_Eide_T 0.52 0.68 0.84
DE_Elbe_T 0.60 0.76 0.88
DE_Schl_C 0.60 0.76 0.88
DK_total_MO 0.82 0.90 0.95
SE_East_C 0.77 0.86 0.93

Potential effect of EMP and EU closures

ncar=nchar(group_name)
period=as.integer(substr(as.character(group_name),ncar,ncar))
blocks=strsplit(group_name,"_")
emus=sapply(blocks,function(x)paste(x[1],x[2],sep="_"))
hty_code=sapply(blocks,function(x) substr(x[3],1,nchar(x[3])-2))



#######EMP
list_period1=data.frame(emu_nameshort=emus[period==1])
list_period1$group=group_name[period==1]
list_period1$id_g=match(list_period1$group,group_name)
list_period1$hty_code=hty_code[period==1]
  
#we check that we have ladings data at least two years before the first EMP closures
list_period1$estimable=mapply(function(s,hty) {
  length(which(charac_EMP_closures$emu_nameshort==s 
               & grepl("S",charac_EMP_closures$lfs_code) 
               & grepl(hty, charac_EMP_closures$hty_code)))>0},
  list_period1$emu_nameshort, list_period1$hty_code)

list_period1$estimable=list_period1$estimable &
(sapply(list_period1$id_g,function(e) min(silvereel_coastal_wide$season[group==e]))+2 <
mapply(function(e,hty) min(charac_EMP_closures$year[charac_EMP_closures$emu_nameshort==e &
                                                           grepl("S",charac_EMP_closures$lfs_code) &
                                                    grepl(hty,charac_EMP_closures$hty_code)]),
       list_period1$emu_nameshort, list_period1$hty_code))

list_period1$lossq2.5=NA
list_period1$lossq50=NA
list_period1$lossq97.5=NA

res_closures=mapply(function(s,g,hty) {
  emu_closures <- EMP_closures %>%
    filter(emu_nameshort==s & grepl("S",lfs_code) & grepl(hty, hty_code)) %>%
    group_by(emu_nameshort,month) %>%
    summarize(fishery_closure_percent=max(fishery_closure_percent))
  myalpha=tmp[,paste("alpha_group[",g,",",emu_closures$month,"]",sep="")]
  if (nrow(emu_closures)>1){
    loss=colSums(apply(myalpha,1,function(x) x*emu_closures$fishery_closure_percent/100))
  } else {
    loss=myalpha*emu_closures$fishery_closure_percent/100
  }
  quantile(loss,probs=c(0.025,.5,.975))
},as.character(list_period1$emu_nameshort[list_period1$estimable]),
list_period1$id_g[list_period1$estimable],
list_period1$hty[list_period1$estimable])

list_period1[list_period1$estimable, c("lossq2.5", "lossq50","lossq97.5")] =
  t(res_closures)

kable(list_period1[,c("emu_nameshort","lossq2.5","lossq50","lossq97.5")],
      col.names=c("emu","q2.5","median","q97.5"),
      caption="proportion of catch potentially lost because of EMP closure",
      digits=2)
proportion of catch potentially lost because of EMP closure
emu q2.5 median q97.5
DE_Eide NA NA NA
DE_Eide NA NA NA
DE_Elbe NA NA NA
DE_Schl NA NA NA
DK_total NA NA NA
SE_East NA NA NA
SE_West NA NA NA
#######EU
list_period2=data.frame(emu_nameshort=emus[period==2])
list_period2$group=group_name[period==2]
list_period2$id_g=match(list_period2$group,group_name)
list_period2$hty_code=hty_code[period==2]
  
#we check that we have ladings data at least two years before the first EMP closures
list_period2$estimable=mapply(function(s,hty) {
  length(which(charac_EU_closures$emu_nameshort==s 
               & grepl("S",charac_EU_closures$lfs_code) 
               & grepl(hty, charac_EU_closures$hty_code)))>0},
  list_period2$emu_nameshort, list_period2$hty_code)

list_period2$estimable=list_period2$estimable &
(sapply(list_period2$id_g,function(e) min(silvereel_coastal_wide$season[group==e]))+2 <
mapply(function(e,hty) min(charac_EU_closures$year[charac_EU_closures$emu_nameshort==e &
                                                           grepl("S",charac_EU_closures$lfs_code) &
                                                    grepl(hty,charac_EU_closures$hty_code)]),
       list_period2$emu_nameshort, list_period2$hty_code))

list_period2$lossq2.5=NA
list_period2$lossq50=NA
list_period2$lossq97.5=NA

res_closures=mapply(function(s,g,hty) {
  emu_closures <- EU_closures %>%
    filter(emu_nameshort==s & grepl("S", lfs_code) & grepl(hty,hty_code)) %>%
    group_by(emu_nameshort,month) %>%
    summarize(fishery_closure_percent=max(fishery_closure_percent))
  myalpha=tmp[,paste("alpha_group[",g,",",emu_closures$month,"]",sep="")]
  if (nrow(emu_closures)>1){
    loss=colSums(apply(myalpha,1,function(x) x*emu_closures$fishery_closure_percent/100))
  } else {
    loss=myalpha*emu_closures$fishery_closure_percent/100
  }
  quantile(loss,probs=c(0.025,.5,.975))
},as.character(list_period2$emu_nameshort[list_period2$estimable]),
list_period2$id_g[list_period2$estimable],
list_period2$hty_code[list_period2$estimable])

list_period2[list_period2$estimable, c("lossq2.5", "lossq50","lossq97.5")] =
  t(res_closures)

kable(list_period2[,c("emu_nameshort","lossq2.5","lossq50","lossq97.5")],
      col.names=c("emu","q2.5","median","q97.5"),
      caption="proportion of catch potentially lost because of EU closure",
      digits=2)
proportion of catch potentially lost because of EU closure
emu q2.5 median q97.5
DE_Eide 0.02 0.03 0.04
DE_Eide NA NA NA
DE_Elbe 0.01 0.02 0.03
DE_Schl 0.03 0.05 0.07
DK_total 0.25 0.32 0.39
ES_Murc NA NA NA
FR_Cors NA NA NA
GB_Angl 0.00 0.02 0.05
GB_SouE 0.00 0.01 0.03
GB_SouW 0.00 0.01 0.02
SE_East 0.11 0.18 0.25
list_period2$type="EU closure"
list_period1$type="EMP closure"
list_period=rbind.data.frame(list_period1,list_period2)
list_period$stage="S"
save(list_period,file="loss_silvercoastal.rdata")

freshwater waters

Data selection

Now we should carry out data selection, more specifically, we want to eliminate rows with two many missing data, too much zero and to check whether there are no duplicates (though Cedric already did it)

silvereel_freshwater <- subset(silvereel, silvereel$hty_code =="F")
kept_seasons <- lapply(unique(silvereel_freshwater$emu_nameshort), function(s){
  sub_silver <- subset(silvereel_freshwater, silvereel_freshwater$emu_nameshort==s)
  kept <- good_coverage_wave(sub_silver)
  #we remove season in which we have less than 50 kg of landings
  if(!is.null(kept))
    kept <- kept[sapply(kept,function(k)
      sum(sub_silver$das_value[sub_silver$season==k],na.rm=TRUE)>50)]
  if (length(kept) == 0) kept <- NULL
  kept
})
## [1] "For  DE_Eide_F  a good season should cover months: 5 to 10"
## [1] "For  DE_Elbe_F  a good season should cover months: 5 to 11"
## [1] "For  DE_Schl_F  a good season should cover months: 4 to 11"
## [1] "For  DE_Warn_F  a good season should cover months: 3 to 10"
## [1] "For  FR_Loir_F  a good season should cover months: 10 to 2"
## [1] "For  GB_Angl_F  a good season should cover months: 7 to 12"
## [1] "For  GB_Dee_F  a good season should cover months: 6 to 11"
## [1] "For  GB_Humb_F  a good season should cover months: 7 to 12"
## [1] "For GB_NorW_F not possible to define a season"
## [1] "For  GB_SouE_F  a good season should cover months: 7 to 12"
## [1] "For  GB_SouW_F  a good season should cover months: 6 to 11"
## [1] "For  GB_Tham_F  a good season should cover months: 4 to 10"
## [1] "For GB_Wale_F not possible to define a season"
## [1] "For IE_East_F not possible to define a season"
## [1] "For IE_West_F not possible to define a season"
## [1] "For  SE_Inla_F  a good season should cover months: 5 to 12"

Finally, here are the series kept given previous criterion.

names(kept_seasons) <- unique(silvereel_freshwater$emu_nameshort)
kept_seasons[!sapply(kept_seasons,is.null)]
## $DE_Eide_F
## [1] 2009 2010 2011 2012 2013 2014
## 
## $DE_Elbe_F
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $DE_Schl_F
##  [1] 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $DE_Warn_F
## [1] 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $FR_Loir_F
## [1] 2005 2007 2008 2009 2010 2013 2016 2017
## 
## $GB_Angl_F
## [1] 2014 2015 2016 2017 2018
## 
## $GB_Humb_F
## [1] 2015 2016
## 
## $GB_SouE_F
## [1] 2014 2015
## 
## $GB_SouW_F
## [1] 2014 2015 2016
## 
## $GB_Tham_F
## [1] 2013 2014 2015 2017
## 
## $SE_Inla_F
## [1] 2006 2013 2014 2015 2016 2017 2018

Data preparation

We carry out the same procedure as for seasonality.

silvereel_freshwater_subset <- subset(silvereel_freshwater, 
                           mapply(function(season, series){
                             season %in% kept_seasons[[series]]
                           }, silvereel_freshwater$season, silvereel_freshwater$emu_nameshort))


silvereel_freshwater_wide <- pivot_wider(data=silvereel_freshwater_subset[, c("emu_nameshort",
                                                     "cou_code",
                                                     "season",
                                                     "das_month",
                                                     "das_value")],
                                names_from="das_month",
                                values_from="das_value")
names(silvereel_freshwater_wide)[-(1:3)] <- paste("m",
                                       names(silvereel_freshwater_wide)[-(1:3)],
                                       sep="")

###we count the number of zeros per lines to remove lines without enough
###fishes
data_poor <- data.frame(silvereel_freshwater_wide$emu_nameshort,
                        silvereel_freshwater_wide$season,
                  zero=rowSums(silvereel_freshwater_wide[, -(1:3)] == 0 |
                                 is.na(silvereel_freshwater_wide[, -(1:3)])),
           tot=rowSums(silvereel_freshwater_wide[, -(1:3)], na.rm=TRUE))
silvereel_freshwater_wide <- silvereel_freshwater_wide[data_poor$zero < 10 
                                                   & data_poor$tot>50, ]
table_datapoor(data_poor %>% filter(zero > 9 | tot<50)) #we remove years where we have less than 2 months)
“data poor”" situation
EMU season number of zero total catch
GB_SouW_F 2015 10 71

It leads to a dataset with 65 rows.

We now replace NA value per zero since we selected our dataseries with missing months corresponding to insignificant months / closed months, and we compute proportions per month for each year.

silvereel_freshwater_wide <- silvereel_freshwater_wide %>%
  replace_na(replace=list(m1=0,
                          m2=0,
                          m3=0,
                          m4=0,
                          m5=0,
                          m6=0,
                          m7=0,
                          m8=0,
                          m9=0,
                          m10=0,
                          m11=0,
                          m12=0))
silvereel_freshwater_wide[, -(1:3)] <- silvereel_freshwater_wide[, -(1:3)] + 1e-3
total_catch_year <- rowSums(silvereel_freshwater_wide[, paste("m", 1:12, sep="")])
silvereel_freshwater_wide <- silvereel_freshwater_wide %>%
  mutate_at(.vars=paste("m",1:12,sep=""),function(x) x/total_catch_year)

The Commission asks us to compare the pattern before and after 2007, probably to see the effect of the Eel Regulation. It is therefore necessary to build a period index. However, since most countries implemented their EMPs only in 2009/2010, we split in 2010.

silvereel_freshwater_wide$period <- ifelse(silvereel_freshwater_wide$season>2009,
                                  2,
                                  1)

kable(table(silvereel_freshwater_wide$period,
       silvereel_freshwater_wide$emu_nameshort),
      row.names=TRUE,
      caption="number of season per EMU and period")
number of season per EMU and period
DE_Eide_F DE_Elbe_F DE_Schl_F DE_Warn_F FR_Loir_F GB_Angl_F GB_Humb_F GB_SouE_F GB_SouW_F GB_Tham_F SE_Inla_F
1 1 1 1 0 4 0 0 0 0 0 1
2 5 9 9 9 4 5 2 2 2 4 6

The situation is not well balanced. Most EMU have data only after 2010.

Running the model

group <- as.integer(interaction(silvereel_freshwater_wide$emu_nameshort,
                                            silvereel_freshwater_wide$period,
                                            drop=TRUE))
nb_occ_group <- table(group)
y <-as.matrix(silvereel_freshwater_wide[, paste("m", 1:12, sep="")])

Now, we make a loop to select the number of clusters based on a DIC criterion

cl <- makeCluster(3, 'FORK')
comparison <- parSapply(cl,2:7,
       function(nbclus){
         mydata <- build_data(nbclus)
         adapted <- FALSE
         while (!adapted){
           tryCatch({
             runjags.options(adapt.incomplete="error")
             res <- run.jags("jags_model.txt", monitor= c("deviance",
                                                          "alpha_group",
                                                          "cluster"),
                        summarise=FALSE, adapt=40000, method="parallel",
                        sample=2000,burnin=100000,n.chains=1,
                        inits=generate_init(nbclus, mydata)[[1]],
                        data=mydata)
                        adapted <- TRUE
                        res_mat <- as.matrix(as.mcmc.list(res))
                        silhouette <- median(compute_silhouette(res_mat))
                        nbused <- apply(res_mat, 1, function(iter){
                          length(table(iter[grep("cluster",
                                                 names(iter))]))
                        })
                        dic <- mean(res_mat[,1])+0.5*var(res_mat[,1])
                        stats <- c(dic,silhouette,mean(nbused))
                  }, error=function(e) {
                    print(paste("not adapted, restarting nbclus",nbclus))
                    }
                  )
         }
         stats
      })
stopCluster(cl)
best_silvereel_freshwater_landings <- data.frame(nbclus=2:(ncol(comparison)+1),
                                              dic=comparison[1, ],
                                              silhouette=comparison[2, ],
                                              used=comparison[3,])
save(best_silvereel_freshwater_landings, file="silvereel_freshwater_landings_jags.rdata")
load("silvereel_freshwater_landings_jags.rdata")
best_silvereel_freshwater_landings
##   nbclus       dic silhouette used
## 1      2 -13200.19  0.1929929    2
## 2      3 -14007.27  0.2923097    3
## 3      4 -13931.64  0.2272654    4
## 4      5 -14170.32  0.1714372    5
## 5      6 -14151.40  0.1501752    5
## 6      7 -14086.58  0.1517780    7

5 seem to be a good compromise: slight decrease in silhouette, but all clusters are used and DIC is good.

nbclus <- 5
mydata <-build_data(5)
adapted <- FALSE
while (!adapted){
   tryCatch({
      runjags.options(adapt.incomplete="error")
      myfit_silvereel_freshwater_landings <- run.jags("jags_model.txt", monitor= c("cluster", "esp", "alpha_group",
                                            "cluster", "centroid",
                                            "centroid_group",
                                            "distToClust", "duration_clus",
                                            "duration_group",
                                            "lambda","id_cluster",
                                            "centroid"),
                      summarise=FALSE, adapt=20000, method="parallel",
                      sample=10000,burnin=200000,n.chains=1, thin=5,
                      inits=generate_init(nbclus, mydata)[[1]], data=mydata)
      adapted <- TRUE
    }, error=function(e) {
       print(paste("not adapted, restarting nbclus",nbclus))
    })
}


save(myfit_silvereel_freshwater_landings, best_silvereel_freshwater_landings,
     file="silvereel_freshwater_landings_jags.rdata")

Results

Once fitted, we can plot monthly pattern per cluster

load("silvereel_freshwater_landings_jags.rdata")
nbclus <- 5
mydata <-build_data(5)
get_pattern_month <- function(res,type="cluster"){
  res_mat <- as.matrix(as.mcmc.list(res, add.mutate=FALSE))
  if (type=="cluster"){
    sub_mat <- as.data.frame(res_mat[,grep("esp",colnames(res_mat))])
  }
  sub_mat <- sub_mat %>% 
    pivot_longer(cols=1:ncol(sub_mat),
                 names_to="param",
                 values_to="proportion")
  tmp <- lapply(as.character(sub_mat$param),function(p) strsplit(p,"[[:punct:]]"))
  sub_mat$cluster<-as.factor(
    as.integer(lapply(tmp, function(tt) tt[[1]][2])))
  sub_mat$month <- as.character(lapply(tmp,
                                       function(tt) paste("m",
                                                          tt[[1]][3],
                                                          sep="")))
  sub_mat$month <- factor(sub_mat$month, levels=paste("m", 1:12, sep=""))
  sub_mat
}

pat <-get_pattern_month(myfit_silvereel_freshwater_landings)
pat$cluster <- factor(match(pat$cluster,c("4","1","5","2","3") ),
                      levels=as.character(1:7))
ggplot(pat,aes(x=month,y=proportion))+
  geom_boxplot(aes(fill=cluster),outlier.shape=NA) +
  scale_fill_manual(values=cols)+facet_wrap(.~cluster, ncol=1) +
  theme_bw()

Cluster 2 peaks in summer with a second peak in december, 5 in winter, 2 in summer. Clusters 1 and 3 are bivariate (spring and autumn).

We compute some statistics to characterize the clusters.

table_characteristics(myfit_silvereel_freshwater_landings, 5)
characteristics of clusters
number of months to reach 80% of total
month of centroid
cluster median q2.5% q97.5% median q2.5% q97.5%
1 5 5 6 10.54 10.20 11.08
2 4 3 4 9.98 9.83 10.13
3 3 3 4 11.69 11.53 11.86
4 5 4 7 5.86 4.68 7.04
5 5 5 6 7.99 7.85 8.12

Duration indicates the minimum number of months that covers 80% of the wave (1st column is the median, and the 2 next one quantiles 2.5% and 97.5% of credibility intervals). Centroid is the centroid of the migration wave (e.g. 11.5 would indicate a migration centred around mid november). The first column is the median and the two next one the quantiles 2.5 and 97.5%.

We can also look at the belonging of the different groups.

groups <- interaction(silvereel_freshwater_wide$emu_nameshort,
                                            silvereel_freshwater_wide$period,
                                            drop=TRUE)
group_name <- levels(groups)
  
get_pattern_month <- function(res,mydata){
  
  tmp <- strsplit(as.character(group_name),
                  "\\.")
  ser <- as.character(lapply(tmp,function(tt){
    tt[1]
  }))
  period <- as.character(lapply(tmp,function(tt){
    tt[2]
  }))
  res_mat <- as.matrix(as.mcmc.list(res,add.mutate=FALSE))
  
  clus <- t(sapply(seq_len(length(unique(groups))), function(id){
    name_col <- paste("cluster[",id,"]",sep="")
    freq <- table(res_mat[,name_col])
    max_class <- names(freq)[order(freq,decreasing=TRUE)[1]]
    c(max_class,freq[as.character(1:nbclus)])
  }))
  storage.mode(clus) <- "numeric"
  classes <- as.data.frame(clus)
  names(classes) <- c("cluster", paste("clus",seq_len(nbclus),sep=""))
  cbind.data.frame(data.frame(ser=ser, period=period),
                   classes)
}

myclassif <- get_pattern_month(myfit_silvereel_freshwater_landings)
myclassif$cluster <- factor(match(myclassif$cluster,c("4","1","5","2","3") ),
                      levels=as.character(1:7))
table_classif(myclassif)
EMU period Max cluster % clus 1 % clus 2 % clus 3 % clus 4 % clus 5
SE_Inla_F 1 1 0 1 0 97 2
SE_Inla_F 2 2 100 0 0 0 0
DE_Eide_F 1 3 0 0 0 0 100
DE_Eide_F 2 3 0 0 0 0 100
DE_Elbe_F 1 3 0 0 0 0 100
DE_Elbe_F 2 3 0 0 0 0 100
DE_Schl_F 1 3 0 0 0 0 100
DE_Schl_F 2 3 0 0 0 0 100
DE_Warn_F 2 3 0 0 0 0 100
GB_Tham_F 2 3 0 0 0 0 100
GB_Angl_F 2 4 0 100 0 0 0
GB_Humb_F 2 4 0 100 0 0 0
GB_SouE_F 2 4 0 100 0 0 0
GB_SouW_F 2 4 0 100 0 0 0
FR_Loir_F 1 5 0 0 100 0 0
FR_Loir_F 2 5 0 0 100 0 0

Once again the spatial pattern is obvious. SE_Inla changed from 1 to 2 suggesting a reduction in the fishing season.

myclassif_p1 <- subset(myclassif, myclassif$period == 1)
myclassif_p2 <- subset(myclassif, myclassif$period == 2)
emu$cluster1 <- factor(myclassif_p1$cluster[match(emu$name_short,
                                                  substr(myclassif_p1$ser,1,nchar(as.character(myclassif_p1$ser))-2))],
                       levels=1:7)
emu$cluster2 <- factor(myclassif_p2$cluster[match(emu$name_short,
                                                substr(myclassif_p2$ser,1,nchar(as.character(myclassif_p2$ser))-2))],
                       levels=1:7)
ggplot(data = cou) +  geom_sf(fill= "antiquewhite") +
        geom_sf(data=emu,aes(fill=cluster1)) + scale_fill_manual(values=cols)+
  theme_bw() +xlim(-20,30) + ylim(35,65) 

ggplot(data = cou) +  geom_sf(fill= "antiquewhite") +
        geom_sf(data=emu,aes(fill=cluster2)) + scale_fill_manual(values=cols)+
  theme_bw() +xlim(-20,30) + ylim(35,65)  

Exporting pattern per group

tmp <- as.matrix(as.mcmc.list(myfit_silvereel_freshwater_landings))
name_col = colnames(tmp)

pattern_Sfresh_landings=do.call("rbind.data.frame",
                                lapply(seq_len(length(levels(groups))), function(g)
                                   median_pattern_group(g, group_name,tmp, "Y","landings")))
save(pattern_Sfresh_landings,file="pattern_Sfresh_landings.rdata")

Similarity between and after 2010

#which groups have data in both periods
occ=table(unique(silvereel_freshwater_wide[,c("emu_nameshort", "period")])[,1])
tocompare=names(occ)[which(occ>1)]

simi=sapply(tocompare, function(s){
  g=grep(s,group_name)
  esp1=tmp[,grep(paste("alpha_group\\[",g[1],",",sep=""),name_col)]
  esp2=tmp[,grep(paste("alpha_group\\[",g[2],",",sep=""),name_col)]
  quantile(apply(cbind(esp1,esp2),
                 1,
                 function(x) sum(pmin(x[1:12],x[13:24]))),
           probs=c(0.025,.5,.975))
})

similarity=data.frame(emu=tocompare,t(simi))

table_similarity(similarity)
similarity
EMU q2.5% median q97.5%
DE_Eide_F 0.59 0.76 0.88
DE_Elbe_F 0.55 0.70 0.83
DE_Schl_F 0.60 0.74 0.86
FR_Loir_F 0.61 0.76 0.88
SE_Inla_F 0.35 0.50 0.65

Potential effect of EMP and EU closures

ncar=nchar(group_name)
period=as.integer(substr(as.character(group_name),ncar,ncar))
blocks=strsplit(group_name,"_")
emus=sapply(blocks,function(x)paste(x[1],x[2],sep="_"))
hty_code=sapply(blocks,function(x) substr(x[3],1,nchar(x[3])-2))



#######EMP
list_period1=data.frame(emu_nameshort=emus[period==1])
list_period1$group=group_name[period==1]
list_period1$id_g=match(list_period1$group,group_name)
list_period1$hty_code=hty_code[period==1]
  
#we check that we have ladings data at least two years before the first EMP closures
list_period1$estimable=mapply(function(s,hty) {
  length(which(charac_EMP_closures$emu_nameshort==s 
               & grepl("S",charac_EMP_closures$lfs_code) 
               & grepl(hty, charac_EMP_closures$hty_code)))>0},
  list_period1$emu_nameshort, list_period1$hty_code)

list_period1$estimable=list_period1$estimable &
(sapply(list_period1$id_g,function(e) min(silvereel_freshwater_wide$season[group==e]))+2 <
mapply(function(e,hty) min(charac_EMP_closures$year[charac_EMP_closures$emu_nameshort==e &
                                                           grepl("S",charac_EMP_closures$lfs_code) &
                                                    grepl(hty,charac_EMP_closures$hty_code)]),
       list_period1$emu_nameshort, list_period1$hty_code))

list_period1$lossq2.5=NA
list_period1$lossq50=NA
list_period1$lossq97.5=NA

res_closures=mapply(function(s,g,hty) {
  emu_closures <- EMP_closures %>%
    filter(emu_nameshort==s & grepl("S",lfs_code) & grepl(hty, hty_code)) %>%
    group_by(emu_nameshort,month) %>%
    summarize(fishery_closure_percent=max(fishery_closure_percent))
  myalpha=tmp[,paste("alpha_group[",g,",",emu_closures$month,"]",sep="")]
  if (nrow(emu_closures)>1){
    loss=colSums(apply(myalpha,1,function(x) x*emu_closures$fishery_closure_percent/100))
  } else {
    loss=myalpha*emu_closures$fishery_closure_percent/100
  }
  quantile(loss,probs=c(0.025,.5,.975))
},as.character(list_period1$emu_nameshort[list_period1$estimable]),
list_period1$id_g[list_period1$estimable],
list_period1$hty[list_period1$estimable])

list_period1[list_period1$estimable, c("lossq2.5", "lossq50","lossq97.5")] =
  t(res_closures)

kable(list_period1[,c("emu_nameshort","lossq2.5","lossq50","lossq97.5")],
      col.names=c("emu","q2.5","median","q97.5"),
      caption="proportion of catch potentially lost because of EMP closure",
      digits=2)
proportion of catch potentially lost because of EMP closure
emu q2.5 median q97.5
DE_Eide NA NA NA
DE_Elbe NA NA NA
DE_Schl NA NA NA
FR_Loir 0.13 0.17 0.21
SE_Inla NA NA NA
#######EU
list_period2=data.frame(emu_nameshort=emus[period==2])
list_period2$group=group_name[period==2]
list_period2$id_g=match(list_period2$group,group_name)
list_period2$hty_code=hty_code[period==2]
  
#we check that we have ladings data at least two years before the first EMP closures
list_period2$estimable=mapply(function(s,hty) {
  length(which(charac_EU_closures$emu_nameshort==s 
               & grepl("S",charac_EU_closures$lfs_code) 
               & grepl(hty, charac_EU_closures$hty_code)))>0},
  list_period2$emu_nameshort, list_period2$hty_code)

list_period2$estimable=list_period2$estimable &
(sapply(list_period2$id_g,function(e) min(silvereel_freshwater_wide$season[group==e]))+2 <
mapply(function(e,hty) min(charac_EU_closures$year[charac_EU_closures$emu_nameshort==e &
                                                           grepl("S",charac_EU_closures$lfs_code) &
                                                    grepl(hty,charac_EU_closures$hty_code)]),
       list_period2$emu_nameshort, list_period2$hty_code))

list_period2$lossq2.5=NA
list_period2$lossq50=NA
list_period2$lossq97.5=NA

res_closures=mapply(function(s,g,hty) {
  emu_closures <- EU_closures %>%
    filter(emu_nameshort==s & grepl("S", lfs_code) & grepl(hty,hty_code)) %>%
    group_by(emu_nameshort,month) %>%
    summarize(fishery_closure_percent=max(fishery_closure_percent))
  myalpha=tmp[,paste("alpha_group[",g,",",emu_closures$month,"]",sep="")]
  if (nrow(emu_closures)>1){
    loss=colSums(apply(myalpha,1,function(x) x*emu_closures$fishery_closure_percent/100))
  } else {
    loss=myalpha*emu_closures$fishery_closure_percent/100
  }
  quantile(loss,probs=c(0.025,.5,.975))
},as.character(list_period2$emu_nameshort[list_period2$estimable]),
list_period2$id_g[list_period2$estimable],
list_period2$hty_code[list_period2$estimable])

list_period2[list_period2$estimable, c("lossq2.5", "lossq50","lossq97.5")] =
  t(res_closures)

kable(list_period2[,c("emu_nameshort","lossq2.5","lossq50","lossq97.5")],
      col.names=c("emu","q2.5","median","q97.5"),
      caption="proportion of catch potentially lost because of EU closure",
      digits=2)
proportion of catch potentially lost because of EU closure
emu q2.5 median q97.5
DE_Eide NA NA NA
DE_Elbe NA NA NA
DE_Schl NA NA NA
DE_Warn NA NA NA
FR_Loir NA NA NA
GB_Angl 0.03 0.07 0.12
GB_Humb 0.03 0.11 0.22
GB_SouE 0.02 0.07 0.15
GB_SouW 0.01 0.02 0.06
GB_Tham 0.00 0.01 0.02
SE_Inla NA NA NA
list_period2$type="EU closure"
list_period1$type="EMP closure"
list_period=rbind.data.frame(list_period1,list_period2)
list_period$stage="S"
save(list_period,file="loss_silverfresh.rdata")

Siver/Yellow

Many EMUs were not able to provide landings data in which yellow and silver eel were discriminated. In such situation, it was impossible to decide a priori if such EMU should be analysed with either silver eel or yellow eel stage. Therefore, we analysed such EMUs indepedently.

YS_eel <- subset(res, res$lfs_code=="YS")

# we start by removing rows with only zero
all_zero <- YS_eel %>%  group_by(emu_nameshort,lfs_code,hty_code,das_year) %>%
        summarize(S=sum(das_value)) %>% 
    filter(S==0)

YS_eel <- YS_eel %>% 
      anti_join(all_zero)
## Joining, by = c("das_year", "emu_nameshort", "lfs_code", "hty_code")
table(YS_eel$hty_code)
## 
##    C    F  FTC    T   TC 
##  419  750  193 1522  373
#We have many data, so we remove "FC" and "FTC" which are weirds mixes
YS_eel <- YS_eel %>%
  filter(!hty_code %in% c("FTC", "FC"))

#in this analysis, the unit will correspond to EMU / habitat so we create 
#corresponding column
YS_eel$emu <- YS_eel$emu_nameshort
YS_eel$emu_nameshort <- paste(YS_eel$emu_nameshort,
                                   YS_eel$hty_code, sep="_")

Similarly to seasonality, we will build season. We reuse the procedure made for silver eel and YS eel seasonality, i.e. defining seasons per emu, with the season starting at the month with minimum landings. The month with lowest catch fmin define the beggining of the season (month_in_season=1) and season y stands for the 12 months from fmin y (e.g., if lowest migration is in december, season ranges from december to november, and season y denotes season from december y to november y+1).

#creating season
YSeel <- do.call("rbind.data.frame",
                     lapply(unique(YS_eel$emu_nameshort),
                            function(s)
                              season_creation(YS_eel[YS_eel$emu_nameshort==s,])))
months_peak_per_series<- unique(YSeel[,c("emu_nameshort","peak_month")])

#large variety in the month with peak of catches among EMU / habitat
table(months_peak_per_series$peak_month)
## 
##  1  4  5  6  7  8  9 10 11 12 
##  3  1  5  5  3  7  4  1  1  1
#we remove data from season 2020
YSeel <- YSeel %>%
  filter(season < 2020)

Looking at the data, it seems that there are EMUS, therefore we will analysed all habitats simultaneously.

table(unique(YSeel[,c("hty_code","emu_nameshort")])$hty_code)
## 
##  C  F  T TC 
##  6  9 13  3

Data selection

Now we should carry out data selection, more specifically, we want to eliminate rows with two many missing data, too much zero and to check whether there are no duplicates (though Cedric already did it)

YSeel_allhab <- YSeel
kept_seasons <- lapply(unique(YSeel_allhab$emu_nameshort), function(s){
  sub_YS <- subset(YSeel_allhab, YSeel_allhab$emu_nameshort==s)
  kept <- good_coverage_wave(sub_YS)
  #we remove season in which we have less than 50 kg of landings
  if(!is.null(kept))
    kept <- kept[sapply(kept,function(k)
      sum(sub_YS$das_value[sub_YS$season==k],na.rm=TRUE)>50)]
  if (length(kept) == 0) kept <- NULL
  kept
})
## [1] "For  DE_Schl_C  a good season should cover months: 1 to 12"
## [1] "For  DE_Warn_F  a good season should cover months: 3 to 10"
## [1] "For  ES_Cata_T  a good season should cover months: 10 to 3"
## [1] "For  ES_Murc_C  a good season should cover months: 11 to 4"
## [1] "For  FI_total_T  a good season should cover months: 5 to 12"
## [1] "For  FR_Adou_T  a good season should cover months: 3 to 12"
## [1] "For  FR_Adou_F  a good season should cover months: 9 to 6"
## [1] "For  FR_Arto_T  a good season should cover months: 1 to 10"
## [1] "For  FR_Bret_T  a good season should cover months: 2 to 10"
## [1] "For  FR_Garo_F  a good season should cover months: 3 to 11"
## [1] "For  FR_Garo_T  a good season should cover months: 12 to 10"
## [1] "For  FR_Loir_T  a good season should cover months: 1 to 11"
## [1] "For  FR_Loir_F  a good season should cover months: 4 to 1"
## [1] "For  FR_Rhin_F  a good season should cover months: 4 to 11"
## [1] "For FR_Rhon_F not possible to define a season"
## [1] "For  FR_Rhon_T  a good season should cover months: 3 to 1"
## [1] "For  FR_Sein_T  a good season should cover months: 4 to 11"
## [1] "For  FR_Sein_F  a good season should cover months: 4 to 11"
## [1] "For  NL_total_TC  a good season should cover months: 4 to 11"
## [1] "For  NL_total_F  a good season should cover months: 4 to 11"
## [1] "For  NO_total_T  a good season should cover months: 5 to 11"
## [1] "For  PL_Oder_TC  a good season should cover months: 4 to 11"
## [1] "For  PL_Oder_C  a good season should cover months: 4 to 11"
## [1] "For  PL_Oder_T  a good season should cover months: 4 to 11"
## [1] "For  PL_Vist_TC  a good season should cover months: 4 to 11"
## [1] "For  PL_Vist_C  a good season should cover months: 4 to 11"
## [1] "For  PL_Vist_T  a good season should cover months: 4 to 11"
## [1] "For PT_Port_T not possible to define a season"
## [1] "For  SE_East_C  a good season should cover months: 6 to 12"
## [1] "For SE_Inla_F not possible to define a season"
## [1] "For  SE_West_C  a good season should cover months: 5 to 11"

Finally, here are the series kept given previous criterion.

names(kept_seasons) <- unique(YSeel_allhab$emu_nameshort)
kept_seasons[!sapply(kept_seasons,is.null)]
## $DE_Schl_C
## [1] 2012 2013
## 
## $DE_Warn_F
##  [1] 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008
## 
## $ES_Cata_T
##  [1] 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
## [15] 2014 2015 2016 2017 2018
## 
## $ES_Murc_C
##  [1] 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
## [15] 2016 2017
## 
## $FI_total_T
## [1] 2011 2012 2013 2014 2015 2016 2017 2018
## 
## $FR_Adou_T
## [1] 2000 2001 2002 2003 2004 2005 2006 2007
## 
## $FR_Arto_T
## [1] 1999 2000 2001 2002 2003 2004 2005 2006 2007
## 
## $FR_Bret_T
## [1] 1999 2000 2001 2002 2003 2004 2005 2006 2007
## 
## $FR_Garo_F
## [1] 2003 2004
## 
## $FR_Garo_T
## [1] 2000 2001 2002 2003 2004 2005 2006 2007
## 
## $FR_Loir_T
## [1] 1999 2000 2001 2002 2003 2004 2005 2006 2007
## 
## $FR_Loir_F
## [1] 2003 2004 2005 2006 2007 2008 2009
## 
## $FR_Rhin_F
## [1] 2002 2004 2005 2006
## 
## $FR_Rhon_T
## [1] 2010 2011 2012 2013 2014 2015 2016 2017
## 
## $FR_Sein_T
## [1] 2000 2001 2002 2003 2004 2005 2006 2007 2008
## 
## $FR_Sein_F
## [1] 2004
## 
## $NL_total_TC
##  [1] 2000 2001 2002 2003 2004 2005 2006 2007 2008 2016 2017
## 
## $NL_total_F
##  [1] 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
## [15] 2015 2016 2017 2018
## 
## $NO_total_T
##  [1] 2002 2003 2004 2005 2006 2007 2008 2009 2010 2016 2017 2018
## 
## $PL_Oder_TC
## [1] 2004 2005 2006 2007 2008 2009 2010
## 
## $PL_Oder_C
## [1] 2010 2011 2012 2013
## 
## $PL_Oder_T
## [1] 2012 2013 2014 2015 2016 2017 2018
## 
## $PL_Vist_TC
## [1] 2004 2005 2006 2007 2008 2009 2010
## 
## $PL_Vist_C
## [1] 2011 2012 2013 2014 2015 2016 2017
## 
## $PL_Vist_T
## [1] 2014 2015 2016 2017 2018
## 
## $SE_East_C
## [1] 2005 2007 2008
## 
## $SE_West_C
## [1] 2000 2001 2002 2004 2006 2008

Data preparation

We carry out the same procedure as for seasonality.

YSeel_allhab_subset <- subset(YSeel_allhab, 
                           mapply(function(season, series){
                             season %in% kept_seasons[[series]]
                           }, YSeel_allhab$season, YSeel_allhab$emu_nameshort))


YSeel_allhab_wide <- pivot_wider(data=YSeel_allhab_subset[, c("emu_nameshort",
                                                     "cou_code",
                                                     "season",
                                                     "das_month",
                                                     "das_value")],
                                names_from="das_month",
                                values_from="das_value")
names(YSeel_allhab_wide)[-(1:3)] <- paste("m",
                                       names(YSeel_allhab_wide)[-(1:3)],
                                       sep="")

###we count the number of zeros per lines to remove lines without enough
###fishes
data_poor <- data.frame(YSeel_allhab_wide$emu_nameshort,
                        YSeel_allhab_wide$season,
                  zero=rowSums(YSeel_allhab_wide[, -(1:3)] == 0 |
                                 is.na(YSeel_allhab_wide[, -(1:3)])),
           tot=rowSums(YSeel_allhab_wide[, -(1:3)], na.rm=TRUE))
YSeel_allhab_wide <- YSeel_allhab_wide[data_poor$zero < 10 & data_poor$tot>50, ]

table_datapoor(data_poor %>% filter(zero > 9 | tot <50)) #we remove years where we have less than 2 months
“data poor”" situation
EMU season number of zero total catch

It leads to a dataset with 216 rows.

We now replace NA value per zero since we selected our dataseries with missing months corresponding to insignificant months / closed months, and we compute proportions per month for each year.

YSeel_allhab_wide <- YSeel_allhab_wide %>%
  replace_na(replace=list(m1=0,
                          m2=0,
                          m3=0,
                          m4=0,
                          m5=0,
                          m6=0,
                          m7=0,
                          m8=0,
                          m9=0,
                          m10=0,
                          m11=0,
                          m12=0))
YSeel_allhab_wide[, -(1:3)] <- YSeel_allhab_wide[, -(1:3)] + 1e-3
total_catch_year <- rowSums(YSeel_allhab_wide[, paste("m", 1:12, sep="")])
YSeel_allhab_wide <- YSeel_allhab_wide %>%
  mutate_at(.vars=paste("m",1:12,sep=""),function(x) x/total_catch_year)

The Commission asks us to compare the pattern before and after 2007, probably to see the effect of the Eel Regulation. It is therefore necessary to build a period index. However, since most countries implemented their EMPs only in 2009/2010, we split in 2010.

YSeel_allhab_wide$period <- ifelse(YSeel_allhab_wide$season>2009,
                                  2,
                                  1)

kable(table(YSeel_allhab_wide$period,
       YSeel_allhab_wide$emu_nameshort),
      row.names=TRUE,
      caption="number of seasons per EMU and period")
number of seasons per EMU and period
DE_Schl_C DE_Warn_F ES_Cata_T ES_Murc_C FI_total_T FR_Adou_T FR_Arto_T FR_Bret_T FR_Garo_F FR_Garo_T FR_Loir_F FR_Loir_T FR_Rhin_F FR_Rhon_T FR_Sein_F FR_Sein_T NL_total_F NL_total_TC NO_total_T PL_Oder_C PL_Oder_T PL_Oder_TC PL_Vist_C PL_Vist_T PL_Vist_TC SE_East_C SE_West_C
1 0 10 10 8 0 8 9 9 2 8 7 9 4 0 1 9 9 9 8 0 0 6 0 0 6 3 6
2 2 0 9 8 8 0 0 0 0 0 0 0 0 8 0 0 9 2 4 4 7 1 7 5 1 0 0

The situation is not well balanced. Most EMU which have data in periods 1 don’t have data in period 2 and conversely.

Running the model

group <- as.integer(interaction(YSeel_allhab_wide$emu_nameshort,
                                            YSeel_allhab_wide$period,
                                            drop=TRUE))
nb_occ_group <- table(group)
y <-as.matrix(YSeel_allhab_wide[, paste("m", 1:12, sep="")])

Now, we make a loop to select the number of clusters based on a DIC criterion

cl <- makeCluster(3, 'FORK')
comparison <- parSapply(cl,2:7,
       function(nbclus){
         mydata <- build_data(nbclus)
         adapted <- FALSE
         while (!adapted){
           tryCatch({
             runjags.options(adapt.incomplete="error")
             res <- run.jags("jags_model.txt", monitor= c("deviance",
                                                          "alpha_group",
                                                          "cluster"),
                        summarise=FALSE, adapt=40000, method="parallel",
                        sample=2000,burnin=100000,n.chains=1,
                        inits=generate_init(nbclus, mydata)[[1]],
                        data=mydata)
                        adapted <- TRUE
                        res_mat <- as.matrix(as.mcmc.list(res))
                        silhouette <- median(compute_silhouette(res_mat))
                        nbused <- apply(res_mat, 1, function(iter){
                          length(table(iter[grep("cluster",
                                                 names(iter))]))
                        })
                        dic <- mean(res_mat[,1])+0.5*var(res_mat[,1])
                        stats <- c(dic,silhouette,mean(nbused))
                  }, error=function(e) {
                    print(paste("not adapted, restarting nbclus",nbclus))
                    }
                  )
         }
         stats
      })
stopCluster(cl)
best_YSeel_allhab_landings <- data.frame(nbclus=2:(ncol(comparison)+1),
                                              dic=comparison[1, ],
                                              silhouette=comparison[2, ],
                                              used=comparison[3, ])
save(best_YSeel_allhab_landings, file="YSeel_allhab_landings_jags.rdata")
load("YSeel_allhab_landings_jags.rdata")
best_YSeel_allhab_landings
##   nbclus       dic silhouette used
## 1      2 -31892.50  0.1238231    2
## 2      3 -37109.05  0.4769433    3
## 3      4 -37475.89  0.1610281    4
## 4      5 -37985.55  0.1785846    5
## 5      6 -37910.28  0.1895422    6
## 6      7 -38135.63  0.1442730    7

The number of clusters used keep increasing, there is a good silhouette and DIC at 6.

nbclus <- 6
mydata <-build_data(6)
adapted <- FALSE
while (!adapted){
   tryCatch({
      runjags.options(adapt.incomplete="error")
      myfit_YSeel_allhab_landings <- run.jags("jags_model.txt", monitor= c("cluster", "esp", "alpha_group",
                                            "cluster", "centroid",
                                            "centroid_group",
                                            "distToClust", "duration_clus",
                                            "duration_group",
                                            "lambda","id_cluster",
                                            "centroid"),
                      summarise=FALSE, adapt=20000, method="parallel",
                      sample=10000,burnin=200000,n.chains=1, thin=5,
                      inits=generate_init(nbclus, mydata)[[1]], data=mydata)
      adapted <- TRUE
    }, error=function(e) {
       print(paste("not adapted, restarting nbclus",nbclus))
    })
}


save(myfit_YSeel_allhab_landings, best_YSeel_allhab_landings,
     file="YSeel_allhab_landings_jags.rdata")

Results

Once fitted, we can plot monthly pattern per cluster

load("YSeel_allhab_landings_jags.rdata")
nbclus <- 6
mydata <-build_data(6)
get_pattern_month <- function(res,type="cluster"){
  res_mat <- as.matrix(as.mcmc.list(res, add.mutate=FALSE))
  if (type=="cluster"){
    sub_mat <- as.data.frame(res_mat[,grep("esp",colnames(res_mat))])
  }
  sub_mat <- sub_mat %>% 
    pivot_longer(cols=1:ncol(sub_mat),
                 names_to="param",
                 values_to="proportion")
  tmp <- lapply(as.character(sub_mat$param),function(p) strsplit(p,"[[:punct:]]"))
  sub_mat$cluster<-as.factor(
    as.integer(lapply(tmp, function(tt) tt[[1]][2])))
  sub_mat$month <- as.character(lapply(tmp,
                                       function(tt) paste("m",
                                                          tt[[1]][3],
                                                          sep="")))
  sub_mat$month <- factor(sub_mat$month, levels=paste("m", 1:12, sep=""))
  sub_mat
}

pat <-get_pattern_month(myfit_YSeel_allhab_landings)
pat$cluster = factor(match(pat$cluster, c("3","6","4","2","1","5")),
                     levels=as.character(1:7))
ggplot(pat,aes(x=month,y=proportion))+
  geom_boxplot(aes(fill=cluster),outlier.shape=NA) +
  scale_fill_manual(values=cols)+facet_wrap(.~cluster, ncol=1)

  theme_bw()
## List of 65
##  $ line                      :List of 6
##   ..$ colour       : chr "black"
##   ..$ size         : num 0.5
##   ..$ linetype     : num 1
##   ..$ lineend      : chr "butt"
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ rect                      :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : chr "black"
##   ..$ size         : num 0.5
##   ..$ linetype     : num 1
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ text                      :List of 11
##   ..$ family       : chr ""
##   ..$ face         : chr "plain"
##   ..$ colour       : chr "black"
##   ..$ size         : num 11
##   ..$ hjust        : num 0.5
##   ..$ vjust        : num 0.5
##   ..$ angle        : num 0
##   ..$ lineheight   : num 0.9
##   ..$ margin       : 'margin' num [1:4] 0pt 0pt 0pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.x              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 2.75pt 0pt 0pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.x.top          :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0pt 0pt 2.75pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.y              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : num 90
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0pt 2.75pt 0pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.y.right        :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : num -90
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0pt 0pt 0pt 2.75pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text                 :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : chr "grey30"
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x               :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 2.2pt 0pt 0pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x.top           :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0pt 0pt 2.2pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.y               :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 1
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0pt 2.2pt 0pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.y.right         :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0pt 0pt 0pt 2.2pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.ticks                :List of 6
##   ..$ colour       : chr "grey20"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ lineend      : NULL
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ axis.ticks.length         : 'unit' num 2.75pt
##   ..- attr(*, "valid.unit")= int 8
##   ..- attr(*, "unit")= chr "pt"
##  $ axis.ticks.length.x       : NULL
##  $ axis.ticks.length.x.top   : NULL
##  $ axis.ticks.length.x.bottom: NULL
##  $ axis.ticks.length.y       : NULL
##  $ axis.ticks.length.y.left  : NULL
##  $ axis.ticks.length.y.right : NULL
##  $ axis.line                 : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ axis.line.x               : NULL
##  $ axis.line.y               : NULL
##  $ legend.background         :List of 5
##   ..$ fill         : NULL
##   ..$ colour       : logi NA
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ legend.margin             : 'margin' num [1:4] 5.5pt 5.5pt 5.5pt 5.5pt
##   ..- attr(*, "valid.unit")= int 8
##   ..- attr(*, "unit")= chr "pt"
##  $ legend.spacing            : 'unit' num 11pt
##   ..- attr(*, "valid.unit")= int 8
##   ..- attr(*, "unit")= chr "pt"
##  $ legend.spacing.x          : NULL
##  $ legend.spacing.y          : NULL
##  $ legend.key                :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : logi NA
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ legend.key.size           : 'unit' num 1.2lines
##   ..- attr(*, "valid.unit")= int 3
##   ..- attr(*, "unit")= chr "lines"
##  $ legend.key.height         : NULL
##  $ legend.key.width          : NULL
##  $ legend.text               :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ legend.text.align         : NULL
##  $ legend.title              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ legend.title.align        : NULL
##  $ legend.position           : chr "right"
##  $ legend.direction          : NULL
##  $ legend.justification      : chr "center"
##  $ legend.box                : NULL
##  $ legend.box.margin         : 'margin' num [1:4] 0cm 0cm 0cm 0cm
##   ..- attr(*, "valid.unit")= int 1
##   ..- attr(*, "unit")= chr "cm"
##  $ legend.box.background     : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ legend.box.spacing        : 'unit' num 11pt
##   ..- attr(*, "valid.unit")= int 8
##   ..- attr(*, "unit")= chr "pt"
##  $ panel.background          :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : logi NA
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ panel.border              :List of 5
##   ..$ fill         : logi NA
##   ..$ colour       : chr "grey20"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ panel.spacing             : 'unit' num 5.5pt
##   ..- attr(*, "valid.unit")= int 8
##   ..- attr(*, "unit")= chr "pt"
##  $ panel.spacing.x           : NULL
##  $ panel.spacing.y           : NULL
##  $ panel.grid                :List of 6
##   ..$ colour       : chr "grey92"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ lineend      : NULL
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ panel.grid.minor          :List of 6
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 0.5
##   ..$ linetype     : NULL
##   ..$ lineend      : NULL
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ panel.ontop               : logi FALSE
##  $ plot.background           :List of 5
##   ..$ fill         : NULL
##   ..$ colour       : chr "white"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ plot.title                :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 1.2
##   ..$ hjust        : num 0
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0pt 0pt 5.5pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.subtitle             :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0pt 0pt 5.5pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.caption              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : num 1
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 5.5pt 0pt 0pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.tag                  :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 1.2
##   ..$ hjust        : num 0.5
##   ..$ vjust        : num 0.5
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.tag.position         : chr "topleft"
##  $ plot.margin               : 'margin' num [1:4] 5.5pt 5.5pt 5.5pt 5.5pt
##   ..- attr(*, "valid.unit")= int 8
##   ..- attr(*, "unit")= chr "pt"
##  $ strip.background          :List of 5
##   ..$ fill         : chr "grey85"
##   ..$ colour       : chr "grey20"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ strip.placement           : chr "inside"
##  $ strip.text                :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : chr "grey10"
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 4.4pt 4.4pt 4.4pt 4.4pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ strip.text.x              : NULL
##  $ strip.text.y              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : num -90
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ strip.switch.pad.grid     : 'unit' num 2.75pt
##   ..- attr(*, "valid.unit")= int 8
##   ..- attr(*, "unit")= chr "pt"
##  $ strip.switch.pad.wrap     : 'unit' num 2.75pt
##   ..- attr(*, "valid.unit")= int 8
##   ..- attr(*, "unit")= chr "pt"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi TRUE
##  - attr(*, "validate")= logi TRUE

Cluster 5 peaks autumn and winter, 6 is similar but shifter 1 month later. Clusters 1 and 2 are widepread with a peak in spring/early summer and a second one un autumn. Cluster 4 is located in autumn only and cluster 3 in summer.

We compute some statistics to characterize the clusters.

table_characteristics(myfit_YSeel_allhab_landings, 6)
characteristics of clusters
number of months to reach 80% of total
month of centroid
cluster median q2.5% q97.5% median q2.5% q97.5%
1 4 4 4 0.26 0.13 0.39
2 3 3 3 9.83 9.65 10.01
3 8 8 9 6.32 6.16 6.49
4 5 5 6 7.70 7.61 7.80
5 4 4 4 1.30 1.16 1.43
6 6 6 6 7.37 7.26 7.49

Duration indicates the minimum number of months that covers 80% of the wave (1st column is the median, and the 2 next one quantiles 2.5% and 97.5% of credibility intervals). Centroid is the centroid of the migration wave (e.g. 11.5 would indicate a migration centred around mid november). The first column is the median and the two next one the quantiles 2.5 and 97.5%.

We can also look at the belonging of the different groups.

groups <- interaction(YSeel_allhab_wide$emu_nameshort,
                                            YSeel_allhab_wide$period,
                                            drop=TRUE)
group_name <- levels(groups)

get_pattern_month <- function(res,mydata){

  tmp <- strsplit(as.character(group_name),
                  "\\.")
  ser <- as.character(lapply(tmp,function(tt){
    tt[1]
  }))
  period <- as.character(lapply(tmp,function(tt){
    tt[2]
  }))
  res_mat <- as.matrix(as.mcmc.list(res,add.mutate=FALSE))
  
  clus <- t(sapply(seq_len(length(unique(groups))), function(id){
    name_col <- paste("cluster[",id,"]",sep="")
    freq <- table(res_mat[,name_col])
    max_class <- names(freq)[order(freq,decreasing=TRUE)[1]]
    c(max_class,freq[as.character(1:nbclus)])
  }))
  storage.mode(clus) <- "numeric"
  classes <- as.data.frame(clus)
  names(classes) <- c("cluster", paste("clus",seq_len(nbclus),sep=""))
  cbind.data.frame(data.frame(ser=ser, period=period),
                   classes)
}

myclassif <- get_pattern_month(myfit_YSeel_allhab_landings)
myclassif$cluster = factor(match(myclassif$cluster, c("3","6","4","2","1","5")),
                     levels=as.character(1:7))

table_classif(myclassif)
EMU period Max cluster % clus 1 % clus 2 % clus 3 % clus 4 % clus 5 % clus 6
FR_Arto_T 1 1 0 0 100 0 0 0
FR_Bret_T 1 1 0 0 100 0 0 0
FR_Garo_T 1 1 0 0 100 0 0 0
FR_Loir_F 1 1 0 0 100 0 0 0
FR_Loir_T 1 1 0 0 100 0 0 0
FR_Rhon_T 2 1 0 0 100 0 0 0
DE_Warn_F 1 2 0 0 0 0 0 100
FR_Adou_T 1 2 0 0 0 0 0 100
FR_Garo_F 1 2 0 0 0 47 0 53
FR_Rhin_F 1 2 0 0 0 0 0 100
FR_Sein_F 1 2 0 0 0 3 0 97
NL_total_TC 1 2 0 0 0 5 0 95
PL_Oder_C 2 2 0 0 0 0 0 100
PL_Oder_T 2 2 0 0 0 0 0 100
PL_Oder_TC 1 2 0 0 0 0 0 100
PL_Oder_TC 2 2 0 0 0 1 0 99
PL_Vist_C 2 2 0 0 0 0 0 100
PL_Vist_T 2 2 0 0 0 0 0 100
PL_Vist_TC 1 2 0 0 0 0 0 100
PL_Vist_TC 2 2 0 0 0 7 0 93
FI_total_T 2 3 0 0 0 100 0 0
FR_Sein_T 1 3 0 0 0 100 0 0
NL_total_F 1 3 0 0 0 100 0 0
NL_total_F 2 3 0 0 0 100 0 0
NL_total_TC 2 3 0 0 0 76 0 24
NO_total_T 1 3 0 0 0 100 0 0
SE_East_C 1 3 0 0 0 100 0 0
SE_West_C 1 3 0 0 0 100 0 0
DE_Schl_C 2 4 0 100 0 0 0 0
NO_total_T 2 4 0 100 0 0 0 0
ES_Cata_T 1 5 100 0 0 0 0 0
ES_Cata_T 2 5 100 0 0 0 0 0
ES_Murc_C 1 6 0 0 0 0 100 0
ES_Murc_C 2 6 0 0 0 0 100 0

Cluster 6 corresponds only to ES_Murc and cluster 5 to ES_Cata to FR_Cors. Cluster 1 corresponds to many French EMUs in transitional waters and 2 and to 3 are diverse.

myplots <-lapply(c("TC","C","T", "F"),function(hty){
  myclassif_p1 <- subset(myclassif, myclassif$period == 1 &
                           endsWith(as.character(myclassif$ser),
                                    hty))
  myclassif_p2 <- subset(myclassif, myclassif$period == 2 &
                           endsWith(as.character(myclassif$ser),
                                    hty))
  emu$cluster1 <- factor(myclassif_p1$cluster[match(emu$name_short,                                                  gsub(paste("_",hty,sep=""),"",as.character(myclassif_p1$ser)))],
                       levels=1:7)
  emu$cluster2 <- factor(myclassif_p2$cluster[match(emu$name_short,                                                gsub(paste("_",hty,sep=""),"",as.character(myclassif_p1$ser)))],
                       levels=1:7)
  p1 <- ggplot(data = cou) +  geom_sf(fill= "antiquewhite") +
          geom_sf(data=emu,aes(fill=cluster1)) + scale_fill_manual(values=cols)+
      theme_bw() +xlim(-20,30) + ylim(35,65) +
    ggtitle(paste("period 1",hty))
  p2 <- ggplot(data = cou) +  geom_sf(fill= "antiquewhite") +
          geom_sf(data=emu,aes(fill=cluster2)) + scale_fill_manual(values=cols)+
    theme_bw() +xlim(-20,30) + ylim(35,65)  +
    ggtitle(paste("period 2",hty))
  return(list(p1,p2))
})
myplots <- do.call(c, myplots)
print(myplots[[1]][[1]])
## Simple feature collection with 54 features and 1 field
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -31.26575 ymin: 32.39748 xmax: 69.07032 ymax: 81.85737
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
## First 10 features:
##                  NAME                       geometry
## 1             Albania MULTIPOLYGON (((19.50115 40...
## 2             Andorra MULTIPOLYGON (((1.439922 42...
## 3             Austria MULTIPOLYGON (((16 48.77775...
## 4             Belgium MULTIPOLYGON (((5 49.79374,...
## 5  Bosnia Herzegovina MULTIPOLYGON (((19.22947 43...
## 6             Croatia MULTIPOLYGON (((14.30038 44...
## 7      Czech Republic MULTIPOLYGON (((14.82523 50...
## 8             Denmark MULTIPOLYGON (((11.99978 54...
## 9             Estonia MULTIPOLYGON (((23.97511 58...
## 10            Finland MULTIPOLYGON (((22.0731 60....
print(myplots[[1]][[2]])
## [[1]]
## mapping:  
## geom_sf: na.rm = FALSE, legend = polygon
## stat_sf: na.rm = FALSE
## position_identity 
## 
## [[2]]
## mapping: fill = ~cluster1 
## geom_sf: na.rm = FALSE, legend = polygon
## stat_sf: na.rm = FALSE
## position_identity
print(myplots[[2]][[1]])
## Simple feature collection with 54 features and 1 field
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -31.26575 ymin: 32.39748 xmax: 69.07032 ymax: 81.85737
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
## First 10 features:
##                  NAME                       geometry
## 1             Albania MULTIPOLYGON (((19.50115 40...
## 2             Andorra MULTIPOLYGON (((1.439922 42...
## 3             Austria MULTIPOLYGON (((16 48.77775...
## 4             Belgium MULTIPOLYGON (((5 49.79374,...
## 5  Bosnia Herzegovina MULTIPOLYGON (((19.22947 43...
## 6             Croatia MULTIPOLYGON (((14.30038 44...
## 7      Czech Republic MULTIPOLYGON (((14.82523 50...
## 8             Denmark MULTIPOLYGON (((11.99978 54...
## 9             Estonia MULTIPOLYGON (((23.97511 58...
## 10            Finland MULTIPOLYGON (((22.0731 60....
print(myplots[[2]][[2]])
## [[1]]
## mapping:  
## geom_sf: na.rm = FALSE, legend = polygon
## stat_sf: na.rm = FALSE
## position_identity 
## 
## [[2]]
## mapping: fill = ~cluster2 
## geom_sf: na.rm = FALSE, legend = polygon
## stat_sf: na.rm = FALSE
## position_identity
print(myplots[[3]][[1]])
## Simple feature collection with 54 features and 1 field
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -31.26575 ymin: 32.39748 xmax: 69.07032 ymax: 81.85737
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
## First 10 features:
##                  NAME                       geometry
## 1             Albania MULTIPOLYGON (((19.50115 40...
## 2             Andorra MULTIPOLYGON (((1.439922 42...
## 3             Austria MULTIPOLYGON (((16 48.77775...
## 4             Belgium MULTIPOLYGON (((5 49.79374,...
## 5  Bosnia Herzegovina MULTIPOLYGON (((19.22947 43...
## 6             Croatia MULTIPOLYGON (((14.30038 44...
## 7      Czech Republic MULTIPOLYGON (((14.82523 50...
## 8             Denmark MULTIPOLYGON (((11.99978 54...
## 9             Estonia MULTIPOLYGON (((23.97511 58...
## 10            Finland MULTIPOLYGON (((22.0731 60....
print(myplots[[3]][[2]])
## [[1]]
## mapping:  
## geom_sf: na.rm = FALSE, legend = polygon
## stat_sf: na.rm = FALSE
## position_identity 
## 
## [[2]]
## mapping: fill = ~cluster1 
## geom_sf: na.rm = FALSE, legend = polygon
## stat_sf: na.rm = FALSE
## position_identity
print(myplots[[4]][[1]])
## Simple feature collection with 54 features and 1 field
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -31.26575 ymin: 32.39748 xmax: 69.07032 ymax: 81.85737
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
## First 10 features:
##                  NAME                       geometry
## 1             Albania MULTIPOLYGON (((19.50115 40...
## 2             Andorra MULTIPOLYGON (((1.439922 42...
## 3             Austria MULTIPOLYGON (((16 48.77775...
## 4             Belgium MULTIPOLYGON (((5 49.79374,...
## 5  Bosnia Herzegovina MULTIPOLYGON (((19.22947 43...
## 6             Croatia MULTIPOLYGON (((14.30038 44...
## 7      Czech Republic MULTIPOLYGON (((14.82523 50...
## 8             Denmark MULTIPOLYGON (((11.99978 54...
## 9             Estonia MULTIPOLYGON (((23.97511 58...
## 10            Finland MULTIPOLYGON (((22.0731 60....
print(myplots[[4]][[2]])
## [[1]]
## mapping:  
## geom_sf: na.rm = FALSE, legend = polygon
## stat_sf: na.rm = FALSE
## position_identity 
## 
## [[2]]
## mapping: fill = ~cluster2 
## geom_sf: na.rm = FALSE, legend = polygon
## stat_sf: na.rm = FALSE
## position_identity

Exporting pattern per group

tmp <- as.matrix(as.mcmc.list(myfit_YSeel_allhab_landings))
name_col = colnames(tmp)

pattern_YS_landings=do.call("rbind.data.frame",
                                lapply(seq_len(length(levels(groups))), function(g)
                                   median_pattern_group(g, group_name,tmp, "YS","landings")))
save(pattern_YS_landings,file="pattern_YS_landings.rdata")

Similarity between and after 2010

#which groups have data in both periods
occ=table(unique(YSeel_allhab_wide[,c("emu_nameshort", "period")])[,1])
tocompare=names(occ)[which(occ>1)]

simi=sapply(tocompare, function(s){
  g=grep(s,group_name)
  esp1=tmp[,grep(paste("alpha_group\\[",g[1],",",sep=""),name_col)]
  esp2=tmp[,grep(paste("alpha_group\\[",g[2],",",sep=""),name_col)]
  quantile(apply(cbind(esp1,esp2),
                 1,
                 function(x) sum(pmin(x[1:12],x[13:24]))),
           probs=c(0.025,.5,.975))
})

similarity=data.frame(emu=tocompare,t(simi))

table_similarity(similarity)
similarity
EMU q2.5% median q97.5%
ES_Cata_T 0.80 0.88 0.95
ES_Murc_C 0.68 0.77 0.86
NL_total_F 0.65 0.73 0.80
NL_total_TC 0.66 0.77 0.87
NO_total_T 0.41 0.50 0.59
PL_Oder_TC 0.60 0.75 0.86
PL_Vist_TC 0.60 0.75 0.86